{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Use VPG to Play CartPole-v0\n",
    "\n",
    "PyTorch version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import sys\n",
    "import logging\n",
    "import itertools\n",
    "\n",
    "import numpy as np\n",
    "np.random.seed(0)\n",
    "import pandas as pd\n",
    "import gym\n",
    "import matplotlib.pyplot as plt\n",
    "import torch\n",
    "torch.manual_seed(0)\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "import torch.distributions as distributions\n",
    "\n",
    "logging.basicConfig(level=logging.DEBUG,\n",
    "        format='%(asctime)s [%(levelname)s] %(message)s',\n",
    "        stream=sys.stdout, datefmt='%H:%M:%S')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22:53:56 [INFO] env: <CartPoleEnv<CartPole-v0>>\n",
      "22:53:56 [INFO] action_space: Discrete(2)\n",
      "22:53:56 [INFO] observation_space: Box(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)\n",
      "22:53:56 [INFO] reward_range: (-inf, inf)\n",
      "22:53:56 [INFO] metadata: {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}\n",
      "22:53:56 [INFO] _max_episode_steps: 200\n",
      "22:53:56 [INFO] _elapsed_steps: None\n",
      "22:53:56 [INFO] id: CartPole-v0\n",
      "22:53:56 [INFO] entry_point: gym.envs.classic_control:CartPoleEnv\n",
      "22:53:56 [INFO] reward_threshold: 195.0\n",
      "22:53:56 [INFO] nondeterministic: False\n",
      "22:53:56 [INFO] max_episode_steps: 200\n",
      "22:53:56 [INFO] _kwargs: {}\n",
      "22:53:56 [INFO] _env_name: CartPole\n"
     ]
    }
   ],
   "source": [
    "env = gym.make('CartPole-v0')\n",
    "env.seed(0)\n",
    "for key in vars(env):\n",
    "    logging.info('%s: %s', key, vars(env)[key])\n",
    "for key in vars(env.spec):\n",
    "    logging.info('%s: %s', key, vars(env.spec)[key])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class VPGAgent:\n",
    "    def __init__(self, env,):\n",
    "        self.action_n = env.action_space.n\n",
    "        self.gamma = 0.99\n",
    "\n",
    "        self.policy_net = self.build_net(\n",
    "                input_size=env.observation_space.shape[0],\n",
    "                hidden_sizes=[],\n",
    "                output_size=self.action_n, output_activator=nn.Softmax(1))\n",
    "        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=0.005)\n",
    "\n",
    "    def build_net(self, input_size, hidden_sizes, output_size,\n",
    "            output_activator=None, use_bias=False):\n",
    "        layers = []\n",
    "        for input_size, output_size in zip(\n",
    "                [input_size,] + hidden_sizes, hidden_sizes + [output_size,]):\n",
    "            layers.append(nn.Linear(input_size, output_size, bias=use_bias))\n",
    "            layers.append(nn.ReLU())\n",
    "        layers = layers[:-1]\n",
    "        if output_activator:\n",
    "            layers.append(output_activator)\n",
    "        model = nn.Sequential(*layers)\n",
    "        return model\n",
    "\n",
    "    def reset(self, mode=None):\n",
    "        self.mode = mode\n",
    "        if self.mode == 'train':\n",
    "            self.trajectory = []\n",
    "\n",
    "    def step(self, observation, reward, done):\n",
    "        state_tensor = torch.as_tensor(observation, dtype=torch.float).unsqueeze(0)\n",
    "        prob_tensor = self.policy_net(state_tensor)\n",
    "        action_tensor = distributions.Categorical(prob_tensor).sample()\n",
    "        action = action_tensor.numpy()[0]\n",
    "        if self.mode == 'train':\n",
    "            self.trajectory += [observation, reward, done, action]\n",
    "        return action\n",
    "\n",
    "    def close(self):\n",
    "        if self.mode == 'train':\n",
    "            self.learn()\n",
    "\n",
    "    def learn(self):\n",
    "        state_tensor = torch.as_tensor(self.trajectory[0::4], dtype=torch.float)\n",
    "        reward_tensor = torch.as_tensor(self.trajectory[1::4], dtype=torch.float)\n",
    "        action_tensor = torch.as_tensor(self.trajectory[3::4], dtype=torch.long)\n",
    "        arange_tensor = torch.arange(state_tensor.shape[0], dtype=torch.float)\n",
    "        discount_tensor = self.gamma ** arange_tensor\n",
    "        discounted_reward_tensor = discount_tensor * reward_tensor\n",
    "        discounted_return_tensor = discounted_reward_tensor.flip(0).cumsum(0).flip(0)\n",
    "        all_pi_tensor = self.policy_net(state_tensor)\n",
    "        pi_tensor = torch.gather(all_pi_tensor, 1,\n",
    "                action_tensor.unsqueeze(1)).squeeze(1)\n",
    "        log_pi_tensor = torch.log(torch.clamp(pi_tensor, 1e-6, 1.))\n",
    "        loss_tensor = -(discounted_return_tensor * log_pi_tensor).mean()\n",
    "        self.optimizer.zero_grad()\n",
    "        loss_tensor.backward()\n",
    "        self.optimizer.step()\n",
    "\n",
    "\n",
    "agent = VPGAgent(env)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22:53:56 [INFO] ==== train ====\n",
      "22:53:56 [DEBUG] train episode 0: reward = 72.00, steps = 72\n",
      "22:53:56 [DEBUG] train episode 1: reward = 50.00, steps = 50\n",
      "22:53:56 [DEBUG] train episode 2: reward = 40.00, steps = 40\n",
      "22:53:56 [DEBUG] train episode 3: reward = 46.00, steps = 46\n",
      "22:53:56 [DEBUG] train episode 4: reward = 24.00, steps = 24\n",
      "22:53:56 [DEBUG] train episode 5: reward = 128.00, steps = 128\n",
      "22:53:56 [DEBUG] train episode 6: reward = 74.00, steps = 74\n",
      "22:53:56 [DEBUG] train episode 7: reward = 53.00, steps = 53\n",
      "22:53:56 [DEBUG] train episode 8: reward = 29.00, steps = 29\n",
      "22:53:56 [DEBUG] train episode 9: reward = 44.00, steps = 44\n",
      "22:53:56 [DEBUG] train episode 10: reward = 75.00, steps = 75\n",
      "22:53:56 [DEBUG] train episode 11: reward = 34.00, steps = 34\n",
      "22:53:56 [DEBUG] train episode 12: reward = 69.00, steps = 69\n",
      "22:53:56 [DEBUG] train episode 13: reward = 18.00, steps = 18\n",
      "22:53:56 [DEBUG] train episode 14: reward = 19.00, steps = 19\n",
      "22:53:56 [DEBUG] train episode 15: reward = 11.00, steps = 11\n",
      "22:53:56 [DEBUG] train episode 16: reward = 19.00, steps = 19\n",
      "22:53:56 [DEBUG] train episode 17: reward = 27.00, steps = 27\n",
      "22:53:56 [DEBUG] train episode 18: reward = 17.00, steps = 17\n",
      "22:53:56 [DEBUG] train episode 19: reward = 47.00, steps = 47\n",
      "22:53:56 [DEBUG] train episode 20: reward = 36.00, steps = 36\n",
      "22:53:56 [DEBUG] train episode 21: reward = 42.00, steps = 42\n",
      "22:53:56 [DEBUG] train episode 22: reward = 52.00, steps = 52\n",
      "22:53:56 [DEBUG] train episode 23: reward = 16.00, steps = 16\n",
      "22:53:56 [DEBUG] train episode 24: reward = 26.00, steps = 26\n",
      "22:53:56 [DEBUG] train episode 25: reward = 50.00, steps = 50\n",
      "22:53:56 [DEBUG] train episode 26: reward = 20.00, steps = 20\n",
      "22:53:56 [DEBUG] train episode 27: reward = 36.00, steps = 36\n",
      "22:53:56 [DEBUG] train episode 28: reward = 22.00, steps = 22\n",
      "22:53:56 [DEBUG] train episode 29: reward = 76.00, steps = 76\n",
      "22:53:56 [DEBUG] train episode 30: reward = 12.00, steps = 12\n",
      "22:53:56 [DEBUG] train episode 31: reward = 26.00, steps = 26\n",
      "22:53:56 [DEBUG] train episode 32: reward = 84.00, steps = 84\n",
      "22:53:56 [DEBUG] train episode 33: reward = 48.00, steps = 48\n",
      "22:53:56 [DEBUG] train episode 34: reward = 18.00, steps = 18\n",
      "22:53:56 [DEBUG] train episode 35: reward = 55.00, steps = 55\n",
      "22:53:56 [DEBUG] train episode 36: reward = 24.00, steps = 24\n",
      "22:53:56 [DEBUG] train episode 37: reward = 23.00, steps = 23\n",
      "22:53:56 [DEBUG] train episode 38: reward = 24.00, steps = 24\n",
      "22:53:56 [DEBUG] train episode 39: reward = 21.00, steps = 21\n",
      "22:53:56 [DEBUG] train episode 40: reward = 21.00, steps = 21\n",
      "22:53:56 [DEBUG] train episode 41: reward = 55.00, steps = 55\n",
      "22:53:56 [DEBUG] train episode 42: reward = 33.00, steps = 33\n",
      "22:53:57 [DEBUG] train episode 43: reward = 78.00, steps = 78\n",
      "22:53:57 [DEBUG] train episode 44: reward = 30.00, steps = 30\n",
      "22:53:57 [DEBUG] train episode 45: reward = 20.00, steps = 20\n",
      "22:53:57 [DEBUG] train episode 46: reward = 40.00, steps = 40\n",
      "22:53:57 [DEBUG] train episode 47: reward = 42.00, steps = 42\n",
      "22:53:57 [DEBUG] train episode 48: reward = 24.00, steps = 24\n",
      "22:53:57 [DEBUG] train episode 49: reward = 15.00, steps = 15\n",
      "22:53:57 [DEBUG] train episode 50: reward = 36.00, steps = 36\n",
      "22:53:57 [DEBUG] train episode 51: reward = 40.00, steps = 40\n",
      "22:53:57 [DEBUG] train episode 52: reward = 55.00, steps = 55\n",
      "22:53:57 [DEBUG] train episode 53: reward = 24.00, steps = 24\n",
      "22:53:57 [DEBUG] train episode 54: reward = 14.00, steps = 14\n",
      "22:53:57 [DEBUG] train episode 55: reward = 21.00, steps = 21\n",
      "22:53:57 [DEBUG] train episode 56: reward = 21.00, steps = 21\n",
      "22:53:57 [DEBUG] train episode 57: reward = 50.00, steps = 50\n",
      "22:53:57 [DEBUG] train episode 58: reward = 26.00, steps = 26\n",
      "22:53:57 [DEBUG] train episode 59: reward = 36.00, steps = 36\n",
      "22:53:57 [DEBUG] train episode 60: reward = 31.00, steps = 31\n",
      "22:53:57 [DEBUG] train episode 61: reward = 44.00, steps = 44\n",
      "22:53:57 [DEBUG] train episode 62: reward = 86.00, steps = 86\n",
      "22:53:57 [DEBUG] train episode 63: reward = 41.00, steps = 41\n",
      "22:53:57 [DEBUG] train episode 64: reward = 40.00, steps = 40\n",
      "22:53:57 [DEBUG] train episode 65: reward = 57.00, steps = 57\n",
      "22:53:57 [DEBUG] train episode 66: reward = 51.00, steps = 51\n",
      "22:53:57 [DEBUG] train episode 67: reward = 76.00, steps = 76\n",
      "22:53:57 [DEBUG] train episode 68: reward = 44.00, steps = 44\n",
      "22:53:57 [DEBUG] train episode 69: reward = 38.00, steps = 38\n",
      "22:53:57 [DEBUG] train episode 70: reward = 31.00, steps = 31\n",
      "22:53:57 [DEBUG] train episode 71: reward = 60.00, steps = 60\n",
      "22:53:57 [DEBUG] train episode 72: reward = 26.00, steps = 26\n",
      "22:53:57 [DEBUG] train episode 73: reward = 105.00, steps = 105\n",
      "22:53:57 [DEBUG] train episode 74: reward = 44.00, steps = 44\n",
      "22:53:57 [DEBUG] train episode 75: reward = 37.00, steps = 37\n",
      "22:53:57 [DEBUG] train episode 76: reward = 29.00, steps = 29\n",
      "22:53:57 [DEBUG] train episode 77: reward = 33.00, steps = 33\n",
      "22:53:57 [DEBUG] train episode 78: reward = 56.00, steps = 56\n",
      "22:53:57 [DEBUG] train episode 79: reward = 33.00, steps = 33\n",
      "22:53:57 [DEBUG] train episode 80: reward = 90.00, steps = 90\n",
      "22:53:57 [DEBUG] train episode 81: reward = 40.00, steps = 40\n",
      "22:53:57 [DEBUG] train episode 82: reward = 34.00, steps = 34\n",
      "22:53:57 [DEBUG] train episode 83: reward = 62.00, steps = 62\n",
      "22:53:57 [DEBUG] train episode 84: reward = 36.00, steps = 36\n",
      "22:53:57 [DEBUG] train episode 85: reward = 65.00, steps = 65\n",
      "22:53:57 [DEBUG] train episode 86: reward = 66.00, steps = 66\n",
      "22:53:57 [DEBUG] train episode 87: reward = 53.00, steps = 53\n",
      "22:53:57 [DEBUG] train episode 88: reward = 21.00, steps = 21\n",
      "22:53:57 [DEBUG] train episode 89: reward = 57.00, steps = 57\n",
      "22:53:57 [DEBUG] train episode 90: reward = 89.00, steps = 89\n",
      "22:53:57 [DEBUG] train episode 91: reward = 65.00, steps = 65\n",
      "22:53:57 [DEBUG] train episode 92: reward = 33.00, steps = 33\n",
      "22:53:57 [DEBUG] train episode 93: reward = 29.00, steps = 29\n",
      "22:53:57 [DEBUG] train episode 94: reward = 42.00, steps = 42\n",
      "22:53:57 [DEBUG] train episode 95: reward = 53.00, steps = 53\n",
      "22:53:57 [DEBUG] train episode 96: reward = 109.00, steps = 109\n",
      "22:53:57 [DEBUG] train episode 97: reward = 32.00, steps = 32\n",
      "22:53:57 [DEBUG] train episode 98: reward = 74.00, steps = 74\n",
      "22:53:57 [DEBUG] train episode 99: reward = 11.00, steps = 11\n",
      "22:53:57 [DEBUG] train episode 100: reward = 42.00, steps = 42\n",
      "22:53:57 [DEBUG] train episode 101: reward = 32.00, steps = 32\n",
      "22:53:57 [DEBUG] train episode 102: reward = 18.00, steps = 18\n",
      "22:53:57 [DEBUG] train episode 103: reward = 26.00, steps = 26\n",
      "22:53:57 [DEBUG] train episode 104: reward = 46.00, steps = 46\n",
      "22:53:57 [DEBUG] train episode 105: reward = 50.00, steps = 50\n",
      "22:53:58 [DEBUG] train episode 106: reward = 79.00, steps = 79\n",
      "22:53:58 [DEBUG] train episode 107: reward = 66.00, steps = 66\n",
      "22:53:58 [DEBUG] train episode 108: reward = 35.00, steps = 35\n",
      "22:53:58 [DEBUG] train episode 109: reward = 36.00, steps = 36\n",
      "22:53:58 [DEBUG] train episode 110: reward = 58.00, steps = 58\n",
      "22:53:58 [DEBUG] train episode 111: reward = 50.00, steps = 50\n",
      "22:53:58 [DEBUG] train episode 112: reward = 52.00, steps = 52\n",
      "22:53:58 [DEBUG] train episode 113: reward = 21.00, steps = 21\n",
      "22:53:58 [DEBUG] train episode 114: reward = 46.00, steps = 46\n",
      "22:53:58 [DEBUG] train episode 115: reward = 42.00, steps = 42\n",
      "22:53:58 [DEBUG] train episode 116: reward = 64.00, steps = 64\n",
      "22:53:58 [DEBUG] train episode 117: reward = 86.00, steps = 86\n",
      "22:53:58 [DEBUG] train episode 118: reward = 55.00, steps = 55\n",
      "22:53:58 [DEBUG] train episode 119: reward = 64.00, steps = 64\n",
      "22:53:58 [DEBUG] train episode 120: reward = 53.00, steps = 53\n",
      "22:53:58 [DEBUG] train episode 121: reward = 52.00, steps = 52\n",
      "22:53:58 [DEBUG] train episode 122: reward = 18.00, steps = 18\n",
      "22:53:58 [DEBUG] train episode 123: reward = 58.00, steps = 58\n",
      "22:53:58 [DEBUG] train episode 124: reward = 22.00, steps = 22\n",
      "22:53:58 [DEBUG] train episode 125: reward = 78.00, steps = 78\n",
      "22:53:58 [DEBUG] train episode 126: reward = 21.00, steps = 21\n",
      "22:53:58 [DEBUG] train episode 127: reward = 77.00, steps = 77\n",
      "22:53:58 [DEBUG] train episode 128: reward = 59.00, steps = 59\n",
      "22:53:58 [DEBUG] train episode 129: reward = 27.00, steps = 27\n",
      "22:53:58 [DEBUG] train episode 130: reward = 91.00, steps = 91\n",
      "22:53:58 [DEBUG] train episode 131: reward = 74.00, steps = 74\n",
      "22:53:58 [DEBUG] train episode 132: reward = 57.00, steps = 57\n",
      "22:53:58 [DEBUG] train episode 133: reward = 50.00, steps = 50\n",
      "22:53:58 [DEBUG] train episode 134: reward = 19.00, steps = 19\n",
      "22:53:58 [DEBUG] train episode 135: reward = 105.00, steps = 105\n",
      "22:53:58 [DEBUG] train episode 136: reward = 50.00, steps = 50\n",
      "22:53:58 [DEBUG] train episode 137: reward = 39.00, steps = 39\n",
      "22:53:58 [DEBUG] train episode 138: reward = 52.00, steps = 52\n",
      "22:53:58 [DEBUG] train episode 139: reward = 45.00, steps = 45\n",
      "22:53:58 [DEBUG] train episode 140: reward = 48.00, steps = 48\n",
      "22:53:58 [DEBUG] train episode 141: reward = 44.00, steps = 44\n",
      "22:53:58 [DEBUG] train episode 142: reward = 88.00, steps = 88\n",
      "22:53:58 [DEBUG] train episode 143: reward = 32.00, steps = 32\n",
      "22:53:58 [DEBUG] train episode 144: reward = 50.00, steps = 50\n",
      "22:53:58 [DEBUG] train episode 145: reward = 34.00, steps = 34\n",
      "22:53:58 [DEBUG] train episode 146: reward = 46.00, steps = 46\n",
      "22:53:58 [DEBUG] train episode 147: reward = 45.00, steps = 45\n",
      "22:53:58 [DEBUG] train episode 148: reward = 38.00, steps = 38\n",
      "22:53:58 [DEBUG] train episode 149: reward = 31.00, steps = 31\n",
      "22:53:58 [DEBUG] train episode 150: reward = 47.00, steps = 47\n",
      "22:53:58 [DEBUG] train episode 151: reward = 76.00, steps = 76\n",
      "22:53:58 [DEBUG] train episode 152: reward = 27.00, steps = 27\n",
      "22:53:58 [DEBUG] train episode 153: reward = 30.00, steps = 30\n",
      "22:53:58 [DEBUG] train episode 154: reward = 118.00, steps = 118\n",
      "22:53:58 [DEBUG] train episode 155: reward = 97.00, steps = 97\n",
      "22:53:58 [DEBUG] train episode 156: reward = 130.00, steps = 130\n",
      "22:53:58 [DEBUG] train episode 157: reward = 35.00, steps = 35\n",
      "22:53:59 [DEBUG] train episode 158: reward = 112.00, steps = 112\n",
      "22:53:59 [DEBUG] train episode 159: reward = 18.00, steps = 18\n",
      "22:53:59 [DEBUG] train episode 160: reward = 65.00, steps = 65\n",
      "22:53:59 [DEBUG] train episode 161: reward = 62.00, steps = 62\n",
      "22:53:59 [DEBUG] train episode 162: reward = 47.00, steps = 47\n",
      "22:53:59 [DEBUG] train episode 163: reward = 87.00, steps = 87\n",
      "22:53:59 [DEBUG] train episode 164: reward = 123.00, steps = 123\n",
      "22:53:59 [DEBUG] train episode 165: reward = 35.00, steps = 35\n",
      "22:53:59 [DEBUG] train episode 166: reward = 65.00, steps = 65\n",
      "22:53:59 [DEBUG] train episode 167: reward = 92.00, steps = 92\n",
      "22:53:59 [DEBUG] train episode 168: reward = 13.00, steps = 13\n",
      "22:53:59 [DEBUG] train episode 169: reward = 33.00, steps = 33\n",
      "22:53:59 [DEBUG] train episode 170: reward = 20.00, steps = 20\n",
      "22:53:59 [DEBUG] train episode 171: reward = 19.00, steps = 19\n",
      "22:53:59 [DEBUG] train episode 172: reward = 80.00, steps = 80\n",
      "22:53:59 [DEBUG] train episode 173: reward = 31.00, steps = 31\n",
      "22:53:59 [DEBUG] train episode 174: reward = 32.00, steps = 32\n",
      "22:53:59 [DEBUG] train episode 175: reward = 28.00, steps = 28\n",
      "22:53:59 [DEBUG] train episode 176: reward = 63.00, steps = 63\n",
      "22:53:59 [DEBUG] train episode 177: reward = 13.00, steps = 13\n",
      "22:53:59 [DEBUG] train episode 178: reward = 36.00, steps = 36\n",
      "22:53:59 [DEBUG] train episode 179: reward = 74.00, steps = 74\n",
      "22:53:59 [DEBUG] train episode 180: reward = 51.00, steps = 51\n",
      "22:53:59 [DEBUG] train episode 181: reward = 17.00, steps = 17\n",
      "22:53:59 [DEBUG] train episode 182: reward = 39.00, steps = 39\n",
      "22:53:59 [DEBUG] train episode 183: reward = 25.00, steps = 25\n",
      "22:53:59 [DEBUG] train episode 184: reward = 26.00, steps = 26\n",
      "22:53:59 [DEBUG] train episode 185: reward = 56.00, steps = 56\n",
      "22:53:59 [DEBUG] train episode 186: reward = 79.00, steps = 79\n",
      "22:53:59 [DEBUG] train episode 187: reward = 40.00, steps = 40\n",
      "22:53:59 [DEBUG] train episode 188: reward = 11.00, steps = 11\n",
      "22:53:59 [DEBUG] train episode 189: reward = 23.00, steps = 23\n",
      "22:53:59 [DEBUG] train episode 190: reward = 22.00, steps = 22\n",
      "22:53:59 [DEBUG] train episode 191: reward = 82.00, steps = 82\n",
      "22:53:59 [DEBUG] train episode 192: reward = 48.00, steps = 48\n",
      "22:53:59 [DEBUG] train episode 193: reward = 23.00, steps = 23\n",
      "22:53:59 [DEBUG] train episode 194: reward = 110.00, steps = 110\n",
      "22:53:59 [DEBUG] train episode 195: reward = 32.00, steps = 32\n",
      "22:53:59 [DEBUG] train episode 196: reward = 67.00, steps = 67\n",
      "22:53:59 [DEBUG] train episode 197: reward = 25.00, steps = 25\n",
      "22:53:59 [DEBUG] train episode 198: reward = 54.00, steps = 54\n",
      "22:53:59 [DEBUG] train episode 199: reward = 65.00, steps = 65\n",
      "22:53:59 [DEBUG] train episode 200: reward = 28.00, steps = 28\n",
      "22:53:59 [DEBUG] train episode 201: reward = 126.00, steps = 126\n",
      "22:53:59 [DEBUG] train episode 202: reward = 25.00, steps = 25\n",
      "22:53:59 [DEBUG] train episode 203: reward = 78.00, steps = 78\n",
      "22:53:59 [DEBUG] train episode 204: reward = 27.00, steps = 27\n",
      "22:53:59 [DEBUG] train episode 205: reward = 65.00, steps = 65\n",
      "22:53:59 [DEBUG] train episode 206: reward = 29.00, steps = 29\n",
      "22:53:59 [DEBUG] train episode 207: reward = 75.00, steps = 75\n",
      "22:53:59 [DEBUG] train episode 208: reward = 44.00, steps = 44\n",
      "22:53:59 [DEBUG] train episode 209: reward = 34.00, steps = 34\n",
      "22:53:59 [DEBUG] train episode 210: reward = 52.00, steps = 52\n",
      "22:53:59 [DEBUG] train episode 211: reward = 62.00, steps = 62\n",
      "22:54:00 [DEBUG] train episode 212: reward = 97.00, steps = 97\n",
      "22:54:00 [DEBUG] train episode 213: reward = 67.00, steps = 67\n",
      "22:54:00 [DEBUG] train episode 214: reward = 29.00, steps = 29\n",
      "22:54:00 [DEBUG] train episode 215: reward = 65.00, steps = 65\n",
      "22:54:00 [DEBUG] train episode 216: reward = 32.00, steps = 32\n",
      "22:54:00 [DEBUG] train episode 217: reward = 44.00, steps = 44\n",
      "22:54:00 [DEBUG] train episode 218: reward = 58.00, steps = 58\n",
      "22:54:00 [DEBUG] train episode 219: reward = 47.00, steps = 47\n",
      "22:54:00 [DEBUG] train episode 220: reward = 76.00, steps = 76\n",
      "22:54:00 [DEBUG] train episode 221: reward = 68.00, steps = 68\n",
      "22:54:00 [DEBUG] train episode 222: reward = 27.00, steps = 27\n",
      "22:54:00 [DEBUG] train episode 223: reward = 106.00, steps = 106\n",
      "22:54:00 [DEBUG] train episode 224: reward = 96.00, steps = 96\n",
      "22:54:00 [DEBUG] train episode 225: reward = 74.00, steps = 74\n",
      "22:54:00 [DEBUG] train episode 226: reward = 91.00, steps = 91\n",
      "22:54:00 [DEBUG] train episode 227: reward = 80.00, steps = 80\n",
      "22:54:00 [DEBUG] train episode 228: reward = 42.00, steps = 42\n",
      "22:54:00 [DEBUG] train episode 229: reward = 31.00, steps = 31\n",
      "22:54:00 [DEBUG] train episode 230: reward = 46.00, steps = 46\n",
      "22:54:00 [DEBUG] train episode 231: reward = 44.00, steps = 44\n",
      "22:54:00 [DEBUG] train episode 232: reward = 86.00, steps = 86\n",
      "22:54:00 [DEBUG] train episode 233: reward = 69.00, steps = 69\n",
      "22:54:00 [DEBUG] train episode 234: reward = 26.00, steps = 26\n",
      "22:54:00 [DEBUG] train episode 235: reward = 40.00, steps = 40\n",
      "22:54:00 [DEBUG] train episode 236: reward = 42.00, steps = 42\n",
      "22:54:00 [DEBUG] train episode 237: reward = 43.00, steps = 43\n",
      "22:54:00 [DEBUG] train episode 238: reward = 39.00, steps = 39\n",
      "22:54:00 [DEBUG] train episode 239: reward = 76.00, steps = 76\n",
      "22:54:00 [DEBUG] train episode 240: reward = 122.00, steps = 122\n",
      "22:54:00 [DEBUG] train episode 241: reward = 105.00, steps = 105\n",
      "22:54:00 [DEBUG] train episode 242: reward = 104.00, steps = 104\n",
      "22:54:00 [DEBUG] train episode 243: reward = 32.00, steps = 32\n",
      "22:54:00 [DEBUG] train episode 244: reward = 104.00, steps = 104\n",
      "22:54:00 [DEBUG] train episode 245: reward = 79.00, steps = 79\n",
      "22:54:00 [DEBUG] train episode 246: reward = 43.00, steps = 43\n",
      "22:54:00 [DEBUG] train episode 247: reward = 26.00, steps = 26\n",
      "22:54:00 [DEBUG] train episode 248: reward = 29.00, steps = 29\n",
      "22:54:00 [DEBUG] train episode 249: reward = 22.00, steps = 22\n",
      "22:54:00 [DEBUG] train episode 250: reward = 35.00, steps = 35\n",
      "22:54:00 [DEBUG] train episode 251: reward = 67.00, steps = 67\n",
      "22:54:00 [DEBUG] train episode 252: reward = 47.00, steps = 47\n",
      "22:54:00 [DEBUG] train episode 253: reward = 51.00, steps = 51\n",
      "22:54:00 [DEBUG] train episode 254: reward = 30.00, steps = 30\n",
      "22:54:00 [DEBUG] train episode 255: reward = 111.00, steps = 111\n",
      "22:54:01 [DEBUG] train episode 256: reward = 94.00, steps = 94\n",
      "22:54:01 [DEBUG] train episode 257: reward = 31.00, steps = 31\n",
      "22:54:01 [DEBUG] train episode 258: reward = 65.00, steps = 65\n",
      "22:54:01 [DEBUG] train episode 259: reward = 31.00, steps = 31\n",
      "22:54:01 [DEBUG] train episode 260: reward = 56.00, steps = 56\n",
      "22:54:01 [DEBUG] train episode 261: reward = 55.00, steps = 55\n",
      "22:54:01 [DEBUG] train episode 262: reward = 38.00, steps = 38\n",
      "22:54:01 [DEBUG] train episode 263: reward = 47.00, steps = 47\n",
      "22:54:01 [DEBUG] train episode 264: reward = 107.00, steps = 107\n",
      "22:54:01 [DEBUG] train episode 265: reward = 106.00, steps = 106\n",
      "22:54:01 [DEBUG] train episode 266: reward = 118.00, steps = 118\n",
      "22:54:01 [DEBUG] train episode 267: reward = 171.00, steps = 171\n",
      "22:54:01 [DEBUG] train episode 268: reward = 63.00, steps = 63\n",
      "22:54:01 [DEBUG] train episode 269: reward = 27.00, steps = 27\n",
      "22:54:01 [DEBUG] train episode 270: reward = 55.00, steps = 55\n",
      "22:54:01 [DEBUG] train episode 271: reward = 24.00, steps = 24\n",
      "22:54:01 [DEBUG] train episode 272: reward = 43.00, steps = 43\n",
      "22:54:01 [DEBUG] train episode 273: reward = 29.00, steps = 29\n",
      "22:54:01 [DEBUG] train episode 274: reward = 57.00, steps = 57\n",
      "22:54:01 [DEBUG] train episode 275: reward = 67.00, steps = 67\n",
      "22:54:01 [DEBUG] train episode 276: reward = 57.00, steps = 57\n",
      "22:54:01 [DEBUG] train episode 277: reward = 42.00, steps = 42\n",
      "22:54:01 [DEBUG] train episode 278: reward = 32.00, steps = 32\n",
      "22:54:01 [DEBUG] train episode 279: reward = 48.00, steps = 48\n",
      "22:54:01 [DEBUG] train episode 280: reward = 62.00, steps = 62\n",
      "22:54:01 [DEBUG] train episode 281: reward = 62.00, steps = 62\n",
      "22:54:01 [DEBUG] train episode 282: reward = 112.00, steps = 112\n",
      "22:54:01 [DEBUG] train episode 283: reward = 88.00, steps = 88\n",
      "22:54:01 [DEBUG] train episode 284: reward = 19.00, steps = 19\n",
      "22:54:01 [DEBUG] train episode 285: reward = 59.00, steps = 59\n",
      "22:54:01 [DEBUG] train episode 286: reward = 39.00, steps = 39\n",
      "22:54:01 [DEBUG] train episode 287: reward = 50.00, steps = 50\n",
      "22:54:01 [DEBUG] train episode 288: reward = 55.00, steps = 55\n",
      "22:54:01 [DEBUG] train episode 289: reward = 85.00, steps = 85\n",
      "22:54:01 [DEBUG] train episode 290: reward = 32.00, steps = 32\n",
      "22:54:01 [DEBUG] train episode 291: reward = 24.00, steps = 24\n",
      "22:54:01 [DEBUG] train episode 292: reward = 68.00, steps = 68\n",
      "22:54:01 [DEBUG] train episode 293: reward = 103.00, steps = 103\n",
      "22:54:01 [DEBUG] train episode 294: reward = 23.00, steps = 23\n",
      "22:54:01 [DEBUG] train episode 295: reward = 48.00, steps = 48\n",
      "22:54:01 [DEBUG] train episode 296: reward = 28.00, steps = 28\n",
      "22:54:01 [DEBUG] train episode 297: reward = 54.00, steps = 54\n",
      "22:54:01 [DEBUG] train episode 298: reward = 178.00, steps = 178\n",
      "22:54:01 [DEBUG] train episode 299: reward = 114.00, steps = 114\n",
      "22:54:01 [DEBUG] train episode 300: reward = 47.00, steps = 47\n",
      "22:54:02 [DEBUG] train episode 301: reward = 39.00, steps = 39\n",
      "22:54:02 [DEBUG] train episode 302: reward = 100.00, steps = 100\n",
      "22:54:02 [DEBUG] train episode 303: reward = 47.00, steps = 47\n",
      "22:54:02 [DEBUG] train episode 304: reward = 51.00, steps = 51\n",
      "22:54:02 [DEBUG] train episode 305: reward = 152.00, steps = 152\n",
      "22:54:02 [DEBUG] train episode 306: reward = 27.00, steps = 27\n",
      "22:54:02 [DEBUG] train episode 307: reward = 26.00, steps = 26\n",
      "22:54:02 [DEBUG] train episode 308: reward = 103.00, steps = 103\n",
      "22:54:02 [DEBUG] train episode 309: reward = 58.00, steps = 58\n",
      "22:54:02 [DEBUG] train episode 310: reward = 108.00, steps = 108\n",
      "22:54:02 [DEBUG] train episode 311: reward = 62.00, steps = 62\n",
      "22:54:02 [DEBUG] train episode 312: reward = 36.00, steps = 36\n",
      "22:54:02 [DEBUG] train episode 313: reward = 47.00, steps = 47\n",
      "22:54:02 [DEBUG] train episode 314: reward = 96.00, steps = 96\n",
      "22:54:02 [DEBUG] train episode 315: reward = 100.00, steps = 100\n",
      "22:54:02 [DEBUG] train episode 316: reward = 53.00, steps = 53\n",
      "22:54:02 [DEBUG] train episode 317: reward = 66.00, steps = 66\n",
      "22:54:02 [DEBUG] train episode 318: reward = 31.00, steps = 31\n",
      "22:54:02 [DEBUG] train episode 319: reward = 76.00, steps = 76\n",
      "22:54:02 [DEBUG] train episode 320: reward = 55.00, steps = 55\n",
      "22:54:02 [DEBUG] train episode 321: reward = 75.00, steps = 75\n",
      "22:54:02 [DEBUG] train episode 322: reward = 54.00, steps = 54\n",
      "22:54:02 [DEBUG] train episode 323: reward = 26.00, steps = 26\n",
      "22:54:02 [DEBUG] train episode 324: reward = 95.00, steps = 95\n",
      "22:54:02 [DEBUG] train episode 325: reward = 34.00, steps = 34\n",
      "22:54:02 [DEBUG] train episode 326: reward = 92.00, steps = 92\n",
      "22:54:02 [DEBUG] train episode 327: reward = 85.00, steps = 85\n",
      "22:54:02 [DEBUG] train episode 328: reward = 63.00, steps = 63\n",
      "22:54:02 [DEBUG] train episode 329: reward = 80.00, steps = 80\n",
      "22:54:02 [DEBUG] train episode 330: reward = 14.00, steps = 14\n",
      "22:54:02 [DEBUG] train episode 331: reward = 29.00, steps = 29\n",
      "22:54:02 [DEBUG] train episode 332: reward = 62.00, steps = 62\n",
      "22:54:02 [DEBUG] train episode 333: reward = 41.00, steps = 41\n",
      "22:54:02 [DEBUG] train episode 334: reward = 192.00, steps = 192\n",
      "22:54:02 [DEBUG] train episode 335: reward = 96.00, steps = 96\n",
      "22:54:02 [DEBUG] train episode 336: reward = 63.00, steps = 63\n",
      "22:54:02 [DEBUG] train episode 337: reward = 53.00, steps = 53\n",
      "22:54:02 [DEBUG] train episode 338: reward = 86.00, steps = 86\n",
      "22:54:02 [DEBUG] train episode 339: reward = 78.00, steps = 78\n",
      "22:54:02 [DEBUG] train episode 340: reward = 74.00, steps = 74\n",
      "22:54:02 [DEBUG] train episode 341: reward = 23.00, steps = 23\n",
      "22:54:02 [DEBUG] train episode 342: reward = 100.00, steps = 100\n",
      "22:54:02 [DEBUG] train episode 343: reward = 72.00, steps = 72\n",
      "22:54:02 [DEBUG] train episode 344: reward = 27.00, steps = 27\n",
      "22:54:02 [DEBUG] train episode 345: reward = 57.00, steps = 57\n",
      "22:54:02 [DEBUG] train episode 346: reward = 28.00, steps = 28\n",
      "22:54:02 [DEBUG] train episode 347: reward = 30.00, steps = 30\n",
      "22:54:03 [DEBUG] train episode 348: reward = 78.00, steps = 78\n",
      "22:54:03 [DEBUG] train episode 349: reward = 29.00, steps = 29\n",
      "22:54:03 [DEBUG] train episode 350: reward = 12.00, steps = 12\n",
      "22:54:03 [DEBUG] train episode 351: reward = 200.00, steps = 200\n",
      "22:54:03 [DEBUG] train episode 352: reward = 61.00, steps = 61\n",
      "22:54:03 [DEBUG] train episode 353: reward = 54.00, steps = 54\n",
      "22:54:03 [DEBUG] train episode 354: reward = 156.00, steps = 156\n",
      "22:54:03 [DEBUG] train episode 355: reward = 41.00, steps = 41\n",
      "22:54:03 [DEBUG] train episode 356: reward = 47.00, steps = 47\n",
      "22:54:03 [DEBUG] train episode 357: reward = 61.00, steps = 61\n",
      "22:54:03 [DEBUG] train episode 358: reward = 25.00, steps = 25\n",
      "22:54:03 [DEBUG] train episode 359: reward = 30.00, steps = 30\n",
      "22:54:03 [DEBUG] train episode 360: reward = 77.00, steps = 77\n",
      "22:54:03 [DEBUG] train episode 361: reward = 64.00, steps = 64\n",
      "22:54:03 [DEBUG] train episode 362: reward = 174.00, steps = 174\n",
      "22:54:03 [DEBUG] train episode 363: reward = 70.00, steps = 70\n",
      "22:54:03 [DEBUG] train episode 364: reward = 93.00, steps = 93\n",
      "22:54:03 [DEBUG] train episode 365: reward = 128.00, steps = 128\n",
      "22:54:03 [DEBUG] train episode 366: reward = 80.00, steps = 80\n",
      "22:54:03 [DEBUG] train episode 367: reward = 36.00, steps = 36\n",
      "22:54:03 [DEBUG] train episode 368: reward = 49.00, steps = 49\n",
      "22:54:03 [DEBUG] train episode 369: reward = 49.00, steps = 49\n",
      "22:54:03 [DEBUG] train episode 370: reward = 77.00, steps = 77\n",
      "22:54:03 [DEBUG] train episode 371: reward = 69.00, steps = 69\n",
      "22:54:03 [DEBUG] train episode 372: reward = 92.00, steps = 92\n",
      "22:54:03 [DEBUG] train episode 373: reward = 79.00, steps = 79\n",
      "22:54:03 [DEBUG] train episode 374: reward = 74.00, steps = 74\n",
      "22:54:03 [DEBUG] train episode 375: reward = 165.00, steps = 165\n",
      "22:54:03 [DEBUG] train episode 376: reward = 77.00, steps = 77\n",
      "22:54:03 [DEBUG] train episode 377: reward = 84.00, steps = 84\n",
      "22:54:03 [DEBUG] train episode 378: reward = 53.00, steps = 53\n",
      "22:54:03 [DEBUG] train episode 379: reward = 35.00, steps = 35\n",
      "22:54:03 [DEBUG] train episode 380: reward = 35.00, steps = 35\n",
      "22:54:03 [DEBUG] train episode 381: reward = 67.00, steps = 67\n",
      "22:54:03 [DEBUG] train episode 382: reward = 84.00, steps = 84\n",
      "22:54:03 [DEBUG] train episode 383: reward = 98.00, steps = 98\n",
      "22:54:03 [DEBUG] train episode 384: reward = 73.00, steps = 73\n",
      "22:54:03 [DEBUG] train episode 385: reward = 74.00, steps = 74\n",
      "22:54:03 [DEBUG] train episode 386: reward = 41.00, steps = 41\n",
      "22:54:03 [DEBUG] train episode 387: reward = 68.00, steps = 68\n",
      "22:54:03 [DEBUG] train episode 388: reward = 47.00, steps = 47\n",
      "22:54:04 [DEBUG] train episode 389: reward = 49.00, steps = 49\n",
      "22:54:04 [DEBUG] train episode 390: reward = 106.00, steps = 106\n",
      "22:54:04 [DEBUG] train episode 391: reward = 29.00, steps = 29\n",
      "22:54:04 [DEBUG] train episode 392: reward = 91.00, steps = 91\n",
      "22:54:04 [DEBUG] train episode 393: reward = 155.00, steps = 155\n",
      "22:54:04 [DEBUG] train episode 394: reward = 80.00, steps = 80\n",
      "22:54:04 [DEBUG] train episode 395: reward = 84.00, steps = 84\n",
      "22:54:04 [DEBUG] train episode 396: reward = 82.00, steps = 82\n",
      "22:54:04 [DEBUG] train episode 397: reward = 22.00, steps = 22\n",
      "22:54:04 [DEBUG] train episode 398: reward = 80.00, steps = 80\n",
      "22:54:04 [DEBUG] train episode 399: reward = 29.00, steps = 29\n",
      "22:54:04 [DEBUG] train episode 400: reward = 25.00, steps = 25\n",
      "22:54:04 [DEBUG] train episode 401: reward = 88.00, steps = 88\n",
      "22:54:04 [DEBUG] train episode 402: reward = 26.00, steps = 26\n",
      "22:54:04 [DEBUG] train episode 403: reward = 32.00, steps = 32\n",
      "22:54:04 [DEBUG] train episode 404: reward = 77.00, steps = 77\n",
      "22:54:04 [DEBUG] train episode 405: reward = 163.00, steps = 163\n",
      "22:54:04 [DEBUG] train episode 406: reward = 117.00, steps = 117\n",
      "22:54:04 [DEBUG] train episode 407: reward = 80.00, steps = 80\n",
      "22:54:04 [DEBUG] train episode 408: reward = 53.00, steps = 53\n",
      "22:54:04 [DEBUG] train episode 409: reward = 58.00, steps = 58\n",
      "22:54:04 [DEBUG] train episode 410: reward = 31.00, steps = 31\n",
      "22:54:04 [DEBUG] train episode 411: reward = 57.00, steps = 57\n",
      "22:54:04 [DEBUG] train episode 412: reward = 132.00, steps = 132\n",
      "22:54:04 [DEBUG] train episode 413: reward = 44.00, steps = 44\n",
      "22:54:04 [DEBUG] train episode 414: reward = 87.00, steps = 87\n",
      "22:54:04 [DEBUG] train episode 415: reward = 41.00, steps = 41\n",
      "22:54:04 [DEBUG] train episode 416: reward = 73.00, steps = 73\n",
      "22:54:04 [DEBUG] train episode 417: reward = 137.00, steps = 137\n",
      "22:54:04 [DEBUG] train episode 418: reward = 97.00, steps = 97\n",
      "22:54:04 [DEBUG] train episode 419: reward = 116.00, steps = 116\n",
      "22:54:04 [DEBUG] train episode 420: reward = 27.00, steps = 27\n",
      "22:54:04 [DEBUG] train episode 421: reward = 34.00, steps = 34\n",
      "22:54:04 [DEBUG] train episode 422: reward = 108.00, steps = 108\n",
      "22:54:04 [DEBUG] train episode 423: reward = 200.00, steps = 200\n",
      "22:54:04 [DEBUG] train episode 424: reward = 34.00, steps = 34\n",
      "22:54:04 [DEBUG] train episode 425: reward = 34.00, steps = 34\n",
      "22:54:05 [DEBUG] train episode 426: reward = 115.00, steps = 115\n",
      "22:54:05 [DEBUG] train episode 427: reward = 55.00, steps = 55\n",
      "22:54:05 [DEBUG] train episode 428: reward = 119.00, steps = 119\n",
      "22:54:05 [DEBUG] train episode 429: reward = 74.00, steps = 74\n",
      "22:54:05 [DEBUG] train episode 430: reward = 42.00, steps = 42\n",
      "22:54:05 [DEBUG] train episode 431: reward = 102.00, steps = 102\n",
      "22:54:05 [DEBUG] train episode 432: reward = 119.00, steps = 119\n",
      "22:54:05 [DEBUG] train episode 433: reward = 32.00, steps = 32\n",
      "22:54:05 [DEBUG] train episode 434: reward = 104.00, steps = 104\n",
      "22:54:05 [DEBUG] train episode 435: reward = 95.00, steps = 95\n",
      "22:54:05 [DEBUG] train episode 436: reward = 73.00, steps = 73\n",
      "22:54:05 [DEBUG] train episode 437: reward = 24.00, steps = 24\n",
      "22:54:05 [DEBUG] train episode 438: reward = 81.00, steps = 81\n",
      "22:54:05 [DEBUG] train episode 439: reward = 130.00, steps = 130\n",
      "22:54:05 [DEBUG] train episode 440: reward = 20.00, steps = 20\n",
      "22:54:05 [DEBUG] train episode 441: reward = 134.00, steps = 134\n",
      "22:54:05 [DEBUG] train episode 442: reward = 161.00, steps = 161\n",
      "22:54:05 [DEBUG] train episode 443: reward = 200.00, steps = 200\n",
      "22:54:05 [DEBUG] train episode 444: reward = 57.00, steps = 57\n",
      "22:54:05 [DEBUG] train episode 445: reward = 86.00, steps = 86\n",
      "22:54:05 [DEBUG] train episode 446: reward = 31.00, steps = 31\n",
      "22:54:05 [DEBUG] train episode 447: reward = 114.00, steps = 114\n",
      "22:54:05 [DEBUG] train episode 448: reward = 12.00, steps = 12\n",
      "22:54:05 [DEBUG] train episode 449: reward = 43.00, steps = 43\n",
      "22:54:05 [DEBUG] train episode 450: reward = 114.00, steps = 114\n",
      "22:54:05 [DEBUG] train episode 451: reward = 20.00, steps = 20\n",
      "22:54:05 [DEBUG] train episode 452: reward = 72.00, steps = 72\n",
      "22:54:05 [DEBUG] train episode 453: reward = 61.00, steps = 61\n",
      "22:54:05 [DEBUG] train episode 454: reward = 116.00, steps = 116\n",
      "22:54:05 [DEBUG] train episode 455: reward = 101.00, steps = 101\n",
      "22:54:05 [DEBUG] train episode 456: reward = 22.00, steps = 22\n",
      "22:54:05 [DEBUG] train episode 457: reward = 75.00, steps = 75\n",
      "22:54:05 [DEBUG] train episode 458: reward = 28.00, steps = 28\n",
      "22:54:05 [DEBUG] train episode 459: reward = 131.00, steps = 131\n",
      "22:54:06 [DEBUG] train episode 460: reward = 87.00, steps = 87\n",
      "22:54:06 [DEBUG] train episode 461: reward = 163.00, steps = 163\n",
      "22:54:06 [DEBUG] train episode 462: reward = 135.00, steps = 135\n",
      "22:54:06 [DEBUG] train episode 463: reward = 191.00, steps = 191\n",
      "22:54:06 [DEBUG] train episode 464: reward = 22.00, steps = 22\n",
      "22:54:06 [DEBUG] train episode 465: reward = 142.00, steps = 142\n",
      "22:54:06 [DEBUG] train episode 466: reward = 46.00, steps = 46\n",
      "22:54:06 [DEBUG] train episode 467: reward = 80.00, steps = 80\n",
      "22:54:06 [DEBUG] train episode 468: reward = 18.00, steps = 18\n",
      "22:54:06 [DEBUG] train episode 469: reward = 125.00, steps = 125\n",
      "22:54:06 [DEBUG] train episode 470: reward = 101.00, steps = 101\n",
      "22:54:06 [DEBUG] train episode 471: reward = 146.00, steps = 146\n",
      "22:54:06 [DEBUG] train episode 472: reward = 37.00, steps = 37\n",
      "22:54:06 [DEBUG] train episode 473: reward = 104.00, steps = 104\n",
      "22:54:06 [DEBUG] train episode 474: reward = 75.00, steps = 75\n",
      "22:54:06 [DEBUG] train episode 475: reward = 44.00, steps = 44\n",
      "22:54:06 [DEBUG] train episode 476: reward = 123.00, steps = 123\n",
      "22:54:06 [DEBUG] train episode 477: reward = 34.00, steps = 34\n",
      "22:54:06 [DEBUG] train episode 478: reward = 13.00, steps = 13\n",
      "22:54:06 [DEBUG] train episode 479: reward = 110.00, steps = 110\n",
      "22:54:06 [DEBUG] train episode 480: reward = 80.00, steps = 80\n",
      "22:54:06 [DEBUG] train episode 481: reward = 86.00, steps = 86\n",
      "22:54:06 [DEBUG] train episode 482: reward = 91.00, steps = 91\n",
      "22:54:06 [DEBUG] train episode 483: reward = 35.00, steps = 35\n",
      "22:54:06 [DEBUG] train episode 484: reward = 70.00, steps = 70\n",
      "22:54:06 [DEBUG] train episode 485: reward = 103.00, steps = 103\n",
      "22:54:06 [DEBUG] train episode 486: reward = 32.00, steps = 32\n",
      "22:54:06 [DEBUG] train episode 487: reward = 44.00, steps = 44\n",
      "22:54:06 [DEBUG] train episode 488: reward = 77.00, steps = 77\n",
      "22:54:06 [DEBUG] train episode 489: reward = 14.00, steps = 14\n",
      "22:54:06 [DEBUG] train episode 490: reward = 69.00, steps = 69\n",
      "22:54:06 [DEBUG] train episode 491: reward = 200.00, steps = 200\n",
      "22:54:06 [DEBUG] train episode 492: reward = 61.00, steps = 61\n",
      "22:54:06 [DEBUG] train episode 493: reward = 38.00, steps = 38\n",
      "22:54:06 [DEBUG] train episode 494: reward = 27.00, steps = 27\n",
      "22:54:06 [DEBUG] train episode 495: reward = 80.00, steps = 80\n",
      "22:54:06 [DEBUG] train episode 496: reward = 61.00, steps = 61\n",
      "22:54:07 [DEBUG] train episode 497: reward = 169.00, steps = 169\n",
      "22:54:07 [DEBUG] train episode 498: reward = 200.00, steps = 200\n",
      "22:54:07 [DEBUG] train episode 499: reward = 40.00, steps = 40\n",
      "22:54:07 [DEBUG] train episode 500: reward = 130.00, steps = 130\n",
      "22:54:07 [DEBUG] train episode 501: reward = 200.00, steps = 200\n",
      "22:54:07 [DEBUG] train episode 502: reward = 75.00, steps = 75\n",
      "22:54:07 [DEBUG] train episode 503: reward = 53.00, steps = 53\n",
      "22:54:07 [DEBUG] train episode 504: reward = 26.00, steps = 26\n",
      "22:54:07 [DEBUG] train episode 505: reward = 133.00, steps = 133\n",
      "22:54:07 [DEBUG] train episode 506: reward = 122.00, steps = 122\n",
      "22:54:07 [DEBUG] train episode 507: reward = 49.00, steps = 49\n",
      "22:54:07 [DEBUG] train episode 508: reward = 138.00, steps = 138\n",
      "22:54:07 [DEBUG] train episode 509: reward = 73.00, steps = 73\n",
      "22:54:07 [DEBUG] train episode 510: reward = 115.00, steps = 115\n",
      "22:54:07 [DEBUG] train episode 511: reward = 200.00, steps = 200\n",
      "22:54:07 [DEBUG] train episode 512: reward = 121.00, steps = 121\n",
      "22:54:07 [DEBUG] train episode 513: reward = 132.00, steps = 132\n",
      "22:54:07 [DEBUG] train episode 514: reward = 65.00, steps = 65\n",
      "22:54:07 [DEBUG] train episode 515: reward = 16.00, steps = 16\n",
      "22:54:07 [DEBUG] train episode 516: reward = 108.00, steps = 108\n",
      "22:54:07 [DEBUG] train episode 517: reward = 40.00, steps = 40\n",
      "22:54:07 [DEBUG] train episode 518: reward = 34.00, steps = 34\n",
      "22:54:07 [DEBUG] train episode 519: reward = 70.00, steps = 70\n",
      "22:54:07 [DEBUG] train episode 520: reward = 101.00, steps = 101\n",
      "22:54:07 [DEBUG] train episode 521: reward = 17.00, steps = 17\n",
      "22:54:07 [DEBUG] train episode 522: reward = 65.00, steps = 65\n",
      "22:54:07 [DEBUG] train episode 523: reward = 106.00, steps = 106\n",
      "22:54:07 [DEBUG] train episode 524: reward = 21.00, steps = 21\n",
      "22:54:07 [DEBUG] train episode 525: reward = 101.00, steps = 101\n",
      "22:54:07 [DEBUG] train episode 526: reward = 103.00, steps = 103\n",
      "22:54:08 [DEBUG] train episode 527: reward = 183.00, steps = 183\n",
      "22:54:08 [DEBUG] train episode 528: reward = 58.00, steps = 58\n",
      "22:54:08 [DEBUG] train episode 529: reward = 34.00, steps = 34\n",
      "22:54:08 [DEBUG] train episode 530: reward = 142.00, steps = 142\n",
      "22:54:08 [DEBUG] train episode 531: reward = 172.00, steps = 172\n",
      "22:54:08 [DEBUG] train episode 532: reward = 110.00, steps = 110\n",
      "22:54:08 [DEBUG] train episode 533: reward = 60.00, steps = 60\n",
      "22:54:08 [DEBUG] train episode 534: reward = 22.00, steps = 22\n",
      "22:54:08 [DEBUG] train episode 535: reward = 118.00, steps = 118\n",
      "22:54:08 [DEBUG] train episode 536: reward = 162.00, steps = 162\n",
      "22:54:08 [DEBUG] train episode 537: reward = 126.00, steps = 126\n",
      "22:54:08 [DEBUG] train episode 538: reward = 47.00, steps = 47\n",
      "22:54:08 [DEBUG] train episode 539: reward = 10.00, steps = 10\n",
      "22:54:08 [DEBUG] train episode 540: reward = 103.00, steps = 103\n",
      "22:54:08 [DEBUG] train episode 541: reward = 59.00, steps = 59\n",
      "22:54:08 [DEBUG] train episode 542: reward = 48.00, steps = 48\n",
      "22:54:08 [DEBUG] train episode 543: reward = 66.00, steps = 66\n",
      "22:54:08 [DEBUG] train episode 544: reward = 149.00, steps = 149\n",
      "22:54:08 [DEBUG] train episode 545: reward = 80.00, steps = 80\n",
      "22:54:08 [DEBUG] train episode 546: reward = 134.00, steps = 134\n",
      "22:54:08 [DEBUG] train episode 547: reward = 111.00, steps = 111\n",
      "22:54:08 [DEBUG] train episode 548: reward = 40.00, steps = 40\n",
      "22:54:08 [DEBUG] train episode 549: reward = 141.00, steps = 141\n",
      "22:54:08 [DEBUG] train episode 550: reward = 111.00, steps = 111\n",
      "22:54:08 [DEBUG] train episode 551: reward = 57.00, steps = 57\n",
      "22:54:08 [DEBUG] train episode 552: reward = 52.00, steps = 52\n",
      "22:54:08 [DEBUG] train episode 553: reward = 90.00, steps = 90\n",
      "22:54:08 [DEBUG] train episode 554: reward = 200.00, steps = 200\n",
      "22:54:08 [DEBUG] train episode 555: reward = 58.00, steps = 58\n",
      "22:54:08 [DEBUG] train episode 556: reward = 60.00, steps = 60\n",
      "22:54:09 [DEBUG] train episode 557: reward = 178.00, steps = 178\n",
      "22:54:09 [DEBUG] train episode 558: reward = 119.00, steps = 119\n",
      "22:54:09 [DEBUG] train episode 559: reward = 17.00, steps = 17\n",
      "22:54:09 [DEBUG] train episode 560: reward = 73.00, steps = 73\n",
      "22:54:09 [DEBUG] train episode 561: reward = 44.00, steps = 44\n",
      "22:54:09 [DEBUG] train episode 562: reward = 177.00, steps = 177\n",
      "22:54:09 [DEBUG] train episode 563: reward = 148.00, steps = 148\n",
      "22:54:09 [DEBUG] train episode 564: reward = 160.00, steps = 160\n",
      "22:54:09 [DEBUG] train episode 565: reward = 50.00, steps = 50\n",
      "22:54:09 [DEBUG] train episode 566: reward = 30.00, steps = 30\n",
      "22:54:09 [DEBUG] train episode 567: reward = 138.00, steps = 138\n",
      "22:54:09 [DEBUG] train episode 568: reward = 44.00, steps = 44\n",
      "22:54:09 [DEBUG] train episode 569: reward = 69.00, steps = 69\n",
      "22:54:09 [DEBUG] train episode 570: reward = 166.00, steps = 166\n",
      "22:54:09 [DEBUG] train episode 571: reward = 70.00, steps = 70\n",
      "22:54:09 [DEBUG] train episode 572: reward = 111.00, steps = 111\n",
      "22:54:09 [DEBUG] train episode 573: reward = 91.00, steps = 91\n",
      "22:54:09 [DEBUG] train episode 574: reward = 133.00, steps = 133\n",
      "22:54:09 [DEBUG] train episode 575: reward = 200.00, steps = 200\n",
      "22:54:09 [DEBUG] train episode 576: reward = 19.00, steps = 19\n",
      "22:54:09 [DEBUG] train episode 577: reward = 200.00, steps = 200\n",
      "22:54:09 [DEBUG] train episode 578: reward = 139.00, steps = 139\n",
      "22:54:09 [DEBUG] train episode 579: reward = 200.00, steps = 200\n",
      "22:54:09 [DEBUG] train episode 580: reward = 14.00, steps = 14\n",
      "22:54:09 [DEBUG] train episode 581: reward = 200.00, steps = 200\n",
      "22:54:09 [DEBUG] train episode 582: reward = 58.00, steps = 58\n",
      "22:54:09 [DEBUG] train episode 583: reward = 140.00, steps = 140\n",
      "22:54:09 [DEBUG] train episode 584: reward = 24.00, steps = 24\n",
      "22:54:10 [DEBUG] train episode 585: reward = 145.00, steps = 145\n",
      "22:54:10 [DEBUG] train episode 586: reward = 26.00, steps = 26\n",
      "22:54:10 [DEBUG] train episode 587: reward = 200.00, steps = 200\n",
      "22:54:10 [DEBUG] train episode 588: reward = 69.00, steps = 69\n",
      "22:54:10 [DEBUG] train episode 589: reward = 153.00, steps = 153\n",
      "22:54:10 [DEBUG] train episode 590: reward = 48.00, steps = 48\n",
      "22:54:10 [DEBUG] train episode 591: reward = 194.00, steps = 194\n",
      "22:54:10 [DEBUG] train episode 592: reward = 87.00, steps = 87\n",
      "22:54:10 [DEBUG] train episode 593: reward = 200.00, steps = 200\n",
      "22:54:10 [DEBUG] train episode 594: reward = 164.00, steps = 164\n",
      "22:54:10 [DEBUG] train episode 595: reward = 200.00, steps = 200\n",
      "22:54:10 [DEBUG] train episode 596: reward = 67.00, steps = 67\n",
      "22:54:10 [DEBUG] train episode 597: reward = 34.00, steps = 34\n",
      "22:54:10 [DEBUG] train episode 598: reward = 200.00, steps = 200\n",
      "22:54:10 [DEBUG] train episode 599: reward = 50.00, steps = 50\n",
      "22:54:10 [DEBUG] train episode 600: reward = 120.00, steps = 120\n",
      "22:54:10 [DEBUG] train episode 601: reward = 17.00, steps = 17\n",
      "22:54:10 [DEBUG] train episode 602: reward = 200.00, steps = 200\n",
      "22:54:10 [DEBUG] train episode 603: reward = 200.00, steps = 200\n",
      "22:54:10 [DEBUG] train episode 604: reward = 24.00, steps = 24\n",
      "22:54:10 [DEBUG] train episode 605: reward = 61.00, steps = 61\n",
      "22:54:10 [DEBUG] train episode 606: reward = 21.00, steps = 21\n",
      "22:54:10 [DEBUG] train episode 607: reward = 184.00, steps = 184\n",
      "22:54:10 [DEBUG] train episode 608: reward = 160.00, steps = 160\n",
      "22:54:10 [DEBUG] train episode 609: reward = 72.00, steps = 72\n",
      "22:54:10 [DEBUG] train episode 610: reward = 123.00, steps = 123\n",
      "22:54:11 [DEBUG] train episode 611: reward = 106.00, steps = 106\n",
      "22:54:11 [DEBUG] train episode 612: reward = 13.00, steps = 13\n",
      "22:54:11 [DEBUG] train episode 613: reward = 174.00, steps = 174\n",
      "22:54:11 [DEBUG] train episode 614: reward = 142.00, steps = 142\n",
      "22:54:11 [DEBUG] train episode 615: reward = 100.00, steps = 100\n",
      "22:54:11 [DEBUG] train episode 616: reward = 138.00, steps = 138\n",
      "22:54:11 [DEBUG] train episode 617: reward = 57.00, steps = 57\n",
      "22:54:11 [DEBUG] train episode 618: reward = 67.00, steps = 67\n",
      "22:54:11 [DEBUG] train episode 619: reward = 149.00, steps = 149\n",
      "22:54:11 [DEBUG] train episode 620: reward = 100.00, steps = 100\n",
      "22:54:11 [DEBUG] train episode 621: reward = 88.00, steps = 88\n",
      "22:54:11 [DEBUG] train episode 622: reward = 60.00, steps = 60\n",
      "22:54:11 [DEBUG] train episode 623: reward = 57.00, steps = 57\n",
      "22:54:11 [DEBUG] train episode 624: reward = 58.00, steps = 58\n",
      "22:54:11 [DEBUG] train episode 625: reward = 155.00, steps = 155\n",
      "22:54:11 [DEBUG] train episode 626: reward = 191.00, steps = 191\n",
      "22:54:11 [DEBUG] train episode 627: reward = 28.00, steps = 28\n",
      "22:54:11 [DEBUG] train episode 628: reward = 192.00, steps = 192\n",
      "22:54:11 [DEBUG] train episode 629: reward = 53.00, steps = 53\n",
      "22:54:11 [DEBUG] train episode 630: reward = 167.00, steps = 167\n",
      "22:54:11 [DEBUG] train episode 631: reward = 16.00, steps = 16\n",
      "22:54:11 [DEBUG] train episode 632: reward = 72.00, steps = 72\n",
      "22:54:11 [DEBUG] train episode 633: reward = 145.00, steps = 145\n",
      "22:54:11 [DEBUG] train episode 634: reward = 51.00, steps = 51\n",
      "22:54:11 [DEBUG] train episode 635: reward = 92.00, steps = 92\n",
      "22:54:11 [DEBUG] train episode 636: reward = 168.00, steps = 168\n",
      "22:54:11 [DEBUG] train episode 637: reward = 98.00, steps = 98\n",
      "22:54:11 [DEBUG] train episode 638: reward = 200.00, steps = 200\n",
      "22:54:12 [DEBUG] train episode 639: reward = 49.00, steps = 49\n",
      "22:54:12 [DEBUG] train episode 640: reward = 125.00, steps = 125\n",
      "22:54:12 [DEBUG] train episode 641: reward = 200.00, steps = 200\n",
      "22:54:12 [DEBUG] train episode 642: reward = 175.00, steps = 175\n",
      "22:54:12 [DEBUG] train episode 643: reward = 73.00, steps = 73\n",
      "22:54:12 [DEBUG] train episode 644: reward = 64.00, steps = 64\n",
      "22:54:12 [DEBUG] train episode 645: reward = 200.00, steps = 200\n",
      "22:54:12 [DEBUG] train episode 646: reward = 54.00, steps = 54\n",
      "22:54:12 [DEBUG] train episode 647: reward = 144.00, steps = 144\n",
      "22:54:12 [DEBUG] train episode 648: reward = 166.00, steps = 166\n",
      "22:54:12 [DEBUG] train episode 649: reward = 89.00, steps = 89\n",
      "22:54:12 [DEBUG] train episode 650: reward = 200.00, steps = 200\n",
      "22:54:12 [DEBUG] train episode 651: reward = 164.00, steps = 164\n",
      "22:54:12 [DEBUG] train episode 652: reward = 200.00, steps = 200\n",
      "22:54:12 [DEBUG] train episode 653: reward = 49.00, steps = 49\n",
      "22:54:12 [DEBUG] train episode 654: reward = 178.00, steps = 178\n",
      "22:54:12 [DEBUG] train episode 655: reward = 16.00, steps = 16\n",
      "22:54:12 [DEBUG] train episode 656: reward = 146.00, steps = 146\n",
      "22:54:12 [DEBUG] train episode 657: reward = 164.00, steps = 164\n",
      "22:54:12 [DEBUG] train episode 658: reward = 180.00, steps = 180\n",
      "22:54:13 [DEBUG] train episode 659: reward = 143.00, steps = 143\n",
      "22:54:13 [DEBUG] train episode 660: reward = 200.00, steps = 200\n",
      "22:54:13 [DEBUG] train episode 661: reward = 200.00, steps = 200\n",
      "22:54:13 [DEBUG] train episode 662: reward = 148.00, steps = 148\n",
      "22:54:13 [DEBUG] train episode 663: reward = 68.00, steps = 68\n",
      "22:54:13 [DEBUG] train episode 664: reward = 63.00, steps = 63\n",
      "22:54:13 [DEBUG] train episode 665: reward = 200.00, steps = 200\n",
      "22:54:13 [DEBUG] train episode 666: reward = 52.00, steps = 52\n",
      "22:54:13 [DEBUG] train episode 667: reward = 200.00, steps = 200\n",
      "22:54:13 [DEBUG] train episode 668: reward = 32.00, steps = 32\n",
      "22:54:13 [DEBUG] train episode 669: reward = 58.00, steps = 58\n",
      "22:54:13 [DEBUG] train episode 670: reward = 100.00, steps = 100\n",
      "22:54:13 [DEBUG] train episode 671: reward = 200.00, steps = 200\n",
      "22:54:13 [DEBUG] train episode 672: reward = 173.00, steps = 173\n",
      "22:54:13 [DEBUG] train episode 673: reward = 89.00, steps = 89\n",
      "22:54:13 [DEBUG] train episode 674: reward = 69.00, steps = 69\n",
      "22:54:13 [DEBUG] train episode 675: reward = 44.00, steps = 44\n",
      "22:54:13 [DEBUG] train episode 676: reward = 20.00, steps = 20\n",
      "22:54:13 [DEBUG] train episode 677: reward = 17.00, steps = 17\n",
      "22:54:13 [DEBUG] train episode 678: reward = 200.00, steps = 200\n",
      "22:54:13 [DEBUG] train episode 679: reward = 71.00, steps = 71\n",
      "22:54:13 [DEBUG] train episode 680: reward = 200.00, steps = 200\n",
      "22:54:13 [DEBUG] train episode 681: reward = 198.00, steps = 198\n",
      "22:54:13 [DEBUG] train episode 682: reward = 82.00, steps = 82\n",
      "22:54:13 [DEBUG] train episode 683: reward = 122.00, steps = 122\n",
      "22:54:14 [DEBUG] train episode 684: reward = 160.00, steps = 160\n",
      "22:54:14 [DEBUG] train episode 685: reward = 147.00, steps = 147\n",
      "22:54:14 [DEBUG] train episode 686: reward = 102.00, steps = 102\n",
      "22:54:14 [DEBUG] train episode 687: reward = 200.00, steps = 200\n",
      "22:54:14 [DEBUG] train episode 688: reward = 106.00, steps = 106\n",
      "22:54:14 [DEBUG] train episode 689: reward = 58.00, steps = 58\n",
      "22:54:14 [DEBUG] train episode 690: reward = 175.00, steps = 175\n",
      "22:54:14 [DEBUG] train episode 691: reward = 189.00, steps = 189\n",
      "22:54:14 [DEBUG] train episode 692: reward = 30.00, steps = 30\n",
      "22:54:14 [DEBUG] train episode 693: reward = 80.00, steps = 80\n",
      "22:54:14 [DEBUG] train episode 694: reward = 36.00, steps = 36\n",
      "22:54:14 [DEBUG] train episode 695: reward = 200.00, steps = 200\n",
      "22:54:14 [DEBUG] train episode 696: reward = 62.00, steps = 62\n",
      "22:54:14 [DEBUG] train episode 697: reward = 75.00, steps = 75\n",
      "22:54:14 [DEBUG] train episode 698: reward = 168.00, steps = 168\n",
      "22:54:14 [DEBUG] train episode 699: reward = 111.00, steps = 111\n",
      "22:54:14 [DEBUG] train episode 700: reward = 154.00, steps = 154\n",
      "22:54:14 [DEBUG] train episode 701: reward = 115.00, steps = 115\n",
      "22:54:14 [DEBUG] train episode 702: reward = 180.00, steps = 180\n",
      "22:54:14 [DEBUG] train episode 703: reward = 110.00, steps = 110\n",
      "22:54:14 [DEBUG] train episode 704: reward = 200.00, steps = 200\n",
      "22:54:14 [DEBUG] train episode 705: reward = 34.00, steps = 34\n",
      "22:54:14 [DEBUG] train episode 706: reward = 76.00, steps = 76\n",
      "22:54:15 [DEBUG] train episode 707: reward = 172.00, steps = 172\n",
      "22:54:15 [DEBUG] train episode 708: reward = 55.00, steps = 55\n",
      "22:54:15 [DEBUG] train episode 709: reward = 97.00, steps = 97\n",
      "22:54:15 [DEBUG] train episode 710: reward = 200.00, steps = 200\n",
      "22:54:15 [DEBUG] train episode 711: reward = 101.00, steps = 101\n",
      "22:54:15 [DEBUG] train episode 712: reward = 139.00, steps = 139\n",
      "22:54:15 [DEBUG] train episode 713: reward = 65.00, steps = 65\n",
      "22:54:15 [DEBUG] train episode 714: reward = 200.00, steps = 200\n",
      "22:54:15 [DEBUG] train episode 715: reward = 200.00, steps = 200\n",
      "22:54:15 [DEBUG] train episode 716: reward = 171.00, steps = 171\n",
      "22:54:15 [DEBUG] train episode 717: reward = 139.00, steps = 139\n",
      "22:54:15 [DEBUG] train episode 718: reward = 200.00, steps = 200\n",
      "22:54:15 [DEBUG] train episode 719: reward = 51.00, steps = 51\n",
      "22:54:15 [DEBUG] train episode 720: reward = 159.00, steps = 159\n",
      "22:54:15 [DEBUG] train episode 721: reward = 105.00, steps = 105\n",
      "22:54:15 [DEBUG] train episode 722: reward = 69.00, steps = 69\n",
      "22:54:15 [DEBUG] train episode 723: reward = 51.00, steps = 51\n",
      "22:54:15 [DEBUG] train episode 724: reward = 104.00, steps = 104\n",
      "22:54:15 [DEBUG] train episode 725: reward = 35.00, steps = 35\n",
      "22:54:15 [DEBUG] train episode 726: reward = 49.00, steps = 49\n",
      "22:54:16 [DEBUG] train episode 727: reward = 176.00, steps = 176\n",
      "22:54:16 [DEBUG] train episode 728: reward = 97.00, steps = 97\n",
      "22:54:16 [DEBUG] train episode 729: reward = 15.00, steps = 15\n",
      "22:54:16 [DEBUG] train episode 730: reward = 113.00, steps = 113\n",
      "22:54:16 [DEBUG] train episode 731: reward = 200.00, steps = 200\n",
      "22:54:16 [DEBUG] train episode 732: reward = 70.00, steps = 70\n",
      "22:54:16 [DEBUG] train episode 733: reward = 145.00, steps = 145\n",
      "22:54:16 [DEBUG] train episode 734: reward = 71.00, steps = 71\n",
      "22:54:16 [DEBUG] train episode 735: reward = 168.00, steps = 168\n",
      "22:54:16 [DEBUG] train episode 736: reward = 178.00, steps = 178\n",
      "22:54:16 [DEBUG] train episode 737: reward = 108.00, steps = 108\n",
      "22:54:16 [DEBUG] train episode 738: reward = 137.00, steps = 137\n",
      "22:54:16 [DEBUG] train episode 739: reward = 161.00, steps = 161\n",
      "22:54:16 [DEBUG] train episode 740: reward = 160.00, steps = 160\n",
      "22:54:16 [DEBUG] train episode 741: reward = 35.00, steps = 35\n",
      "22:54:16 [DEBUG] train episode 742: reward = 200.00, steps = 200\n",
      "22:54:16 [DEBUG] train episode 743: reward = 65.00, steps = 65\n",
      "22:54:16 [DEBUG] train episode 744: reward = 156.00, steps = 156\n",
      "22:54:16 [DEBUG] train episode 745: reward = 13.00, steps = 13\n",
      "22:54:16 [DEBUG] train episode 746: reward = 93.00, steps = 93\n",
      "22:54:17 [DEBUG] train episode 747: reward = 200.00, steps = 200\n",
      "22:54:17 [DEBUG] train episode 748: reward = 148.00, steps = 148\n",
      "22:54:17 [DEBUG] train episode 749: reward = 172.00, steps = 172\n",
      "22:54:17 [DEBUG] train episode 750: reward = 96.00, steps = 96\n",
      "22:54:17 [DEBUG] train episode 751: reward = 154.00, steps = 154\n",
      "22:54:17 [DEBUG] train episode 752: reward = 200.00, steps = 200\n",
      "22:54:17 [DEBUG] train episode 753: reward = 37.00, steps = 37\n",
      "22:54:17 [DEBUG] train episode 754: reward = 127.00, steps = 127\n",
      "22:54:17 [DEBUG] train episode 755: reward = 200.00, steps = 200\n",
      "22:54:17 [DEBUG] train episode 756: reward = 200.00, steps = 200\n",
      "22:54:17 [DEBUG] train episode 757: reward = 33.00, steps = 33\n",
      "22:54:17 [DEBUG] train episode 758: reward = 108.00, steps = 108\n",
      "22:54:17 [DEBUG] train episode 759: reward = 72.00, steps = 72\n",
      "22:54:17 [DEBUG] train episode 760: reward = 156.00, steps = 156\n",
      "22:54:17 [DEBUG] train episode 761: reward = 149.00, steps = 149\n",
      "22:54:17 [DEBUG] train episode 762: reward = 23.00, steps = 23\n",
      "22:54:17 [DEBUG] train episode 763: reward = 200.00, steps = 200\n",
      "22:54:18 [DEBUG] train episode 764: reward = 155.00, steps = 155\n",
      "22:54:18 [DEBUG] train episode 765: reward = 110.00, steps = 110\n",
      "22:54:18 [DEBUG] train episode 766: reward = 181.00, steps = 181\n",
      "22:54:18 [DEBUG] train episode 767: reward = 76.00, steps = 76\n",
      "22:54:18 [DEBUG] train episode 768: reward = 61.00, steps = 61\n",
      "22:54:18 [DEBUG] train episode 769: reward = 124.00, steps = 124\n",
      "22:54:18 [DEBUG] train episode 770: reward = 34.00, steps = 34\n",
      "22:54:18 [DEBUG] train episode 771: reward = 166.00, steps = 166\n",
      "22:54:18 [DEBUG] train episode 772: reward = 200.00, steps = 200\n",
      "22:54:18 [DEBUG] train episode 773: reward = 180.00, steps = 180\n",
      "22:54:18 [DEBUG] train episode 774: reward = 200.00, steps = 200\n",
      "22:54:18 [DEBUG] train episode 775: reward = 118.00, steps = 118\n",
      "22:54:18 [DEBUG] train episode 776: reward = 125.00, steps = 125\n",
      "22:54:18 [DEBUG] train episode 777: reward = 55.00, steps = 55\n",
      "22:54:18 [DEBUG] train episode 778: reward = 21.00, steps = 21\n",
      "22:54:18 [DEBUG] train episode 779: reward = 139.00, steps = 139\n",
      "22:54:18 [DEBUG] train episode 780: reward = 43.00, steps = 43\n",
      "22:54:19 [DEBUG] train episode 781: reward = 122.00, steps = 122\n",
      "22:54:19 [DEBUG] train episode 782: reward = 163.00, steps = 163\n",
      "22:54:19 [DEBUG] train episode 783: reward = 200.00, steps = 200\n",
      "22:54:19 [DEBUG] train episode 784: reward = 146.00, steps = 146\n",
      "22:54:19 [DEBUG] train episode 785: reward = 70.00, steps = 70\n",
      "22:54:19 [DEBUG] train episode 786: reward = 66.00, steps = 66\n",
      "22:54:19 [DEBUG] train episode 787: reward = 200.00, steps = 200\n",
      "22:54:19 [DEBUG] train episode 788: reward = 200.00, steps = 200\n",
      "22:54:19 [DEBUG] train episode 789: reward = 143.00, steps = 143\n",
      "22:54:19 [DEBUG] train episode 790: reward = 35.00, steps = 35\n",
      "22:54:19 [DEBUG] train episode 791: reward = 200.00, steps = 200\n",
      "22:54:19 [DEBUG] train episode 792: reward = 88.00, steps = 88\n",
      "22:54:19 [DEBUG] train episode 793: reward = 198.00, steps = 198\n",
      "22:54:19 [DEBUG] train episode 794: reward = 117.00, steps = 117\n",
      "22:54:19 [DEBUG] train episode 795: reward = 199.00, steps = 199\n",
      "22:54:20 [DEBUG] train episode 796: reward = 200.00, steps = 200\n",
      "22:54:20 [DEBUG] train episode 797: reward = 182.00, steps = 182\n",
      "22:54:20 [DEBUG] train episode 798: reward = 56.00, steps = 56\n",
      "22:54:20 [DEBUG] train episode 799: reward = 19.00, steps = 19\n",
      "22:54:20 [DEBUG] train episode 800: reward = 67.00, steps = 67\n",
      "22:54:20 [DEBUG] train episode 801: reward = 200.00, steps = 200\n",
      "22:54:20 [DEBUG] train episode 802: reward = 107.00, steps = 107\n",
      "22:54:20 [DEBUG] train episode 803: reward = 187.00, steps = 187\n",
      "22:54:20 [DEBUG] train episode 804: reward = 195.00, steps = 195\n",
      "22:54:20 [DEBUG] train episode 805: reward = 64.00, steps = 64\n",
      "22:54:20 [DEBUG] train episode 806: reward = 200.00, steps = 200\n",
      "22:54:20 [DEBUG] train episode 807: reward = 200.00, steps = 200\n",
      "22:54:20 [DEBUG] train episode 808: reward = 47.00, steps = 47\n",
      "22:54:20 [DEBUG] train episode 809: reward = 128.00, steps = 128\n",
      "22:54:20 [DEBUG] train episode 810: reward = 63.00, steps = 63\n",
      "22:54:20 [DEBUG] train episode 811: reward = 126.00, steps = 126\n",
      "22:54:20 [DEBUG] train episode 812: reward = 125.00, steps = 125\n",
      "22:54:20 [DEBUG] train episode 813: reward = 190.00, steps = 190\n",
      "22:54:20 [DEBUG] train episode 814: reward = 200.00, steps = 200\n",
      "22:54:21 [DEBUG] train episode 815: reward = 50.00, steps = 50\n",
      "22:54:21 [DEBUG] train episode 816: reward = 200.00, steps = 200\n",
      "22:54:21 [DEBUG] train episode 817: reward = 137.00, steps = 137\n",
      "22:54:21 [DEBUG] train episode 818: reward = 73.00, steps = 73\n",
      "22:54:21 [DEBUG] train episode 819: reward = 200.00, steps = 200\n",
      "22:54:21 [DEBUG] train episode 820: reward = 200.00, steps = 200\n",
      "22:54:21 [DEBUG] train episode 821: reward = 200.00, steps = 200\n",
      "22:54:21 [DEBUG] train episode 822: reward = 116.00, steps = 116\n",
      "22:54:21 [DEBUG] train episode 823: reward = 200.00, steps = 200\n",
      "22:54:21 [DEBUG] train episode 824: reward = 54.00, steps = 54\n",
      "22:54:21 [DEBUG] train episode 825: reward = 200.00, steps = 200\n",
      "22:54:21 [DEBUG] train episode 826: reward = 79.00, steps = 79\n",
      "22:54:21 [DEBUG] train episode 827: reward = 71.00, steps = 71\n",
      "22:54:21 [DEBUG] train episode 828: reward = 200.00, steps = 200\n",
      "22:54:21 [DEBUG] train episode 829: reward = 200.00, steps = 200\n",
      "22:54:22 [DEBUG] train episode 830: reward = 194.00, steps = 194\n",
      "22:54:22 [DEBUG] train episode 831: reward = 134.00, steps = 134\n",
      "22:54:22 [DEBUG] train episode 832: reward = 15.00, steps = 15\n",
      "22:54:22 [DEBUG] train episode 833: reward = 113.00, steps = 113\n",
      "22:54:22 [DEBUG] train episode 834: reward = 131.00, steps = 131\n",
      "22:54:22 [DEBUG] train episode 835: reward = 73.00, steps = 73\n",
      "22:54:22 [DEBUG] train episode 836: reward = 114.00, steps = 114\n",
      "22:54:22 [DEBUG] train episode 837: reward = 162.00, steps = 162\n",
      "22:54:22 [DEBUG] train episode 838: reward = 45.00, steps = 45\n",
      "22:54:22 [DEBUG] train episode 839: reward = 63.00, steps = 63\n",
      "22:54:22 [DEBUG] train episode 840: reward = 61.00, steps = 61\n",
      "22:54:22 [DEBUG] train episode 841: reward = 173.00, steps = 173\n",
      "22:54:22 [DEBUG] train episode 842: reward = 200.00, steps = 200\n",
      "22:54:22 [DEBUG] train episode 843: reward = 164.00, steps = 164\n",
      "22:54:22 [DEBUG] train episode 844: reward = 116.00, steps = 116\n",
      "22:54:22 [DEBUG] train episode 845: reward = 124.00, steps = 124\n",
      "22:54:22 [DEBUG] train episode 846: reward = 86.00, steps = 86\n",
      "22:54:22 [DEBUG] train episode 847: reward = 31.00, steps = 31\n",
      "22:54:22 [DEBUG] train episode 848: reward = 170.00, steps = 170\n",
      "22:54:22 [DEBUG] train episode 849: reward = 27.00, steps = 27\n",
      "22:54:23 [DEBUG] train episode 850: reward = 63.00, steps = 63\n",
      "22:54:23 [DEBUG] train episode 851: reward = 20.00, steps = 20\n",
      "22:54:23 [DEBUG] train episode 852: reward = 200.00, steps = 200\n",
      "22:54:23 [DEBUG] train episode 853: reward = 200.00, steps = 200\n",
      "22:54:23 [DEBUG] train episode 854: reward = 22.00, steps = 22\n",
      "22:54:23 [DEBUG] train episode 855: reward = 128.00, steps = 128\n",
      "22:54:23 [DEBUG] train episode 856: reward = 186.00, steps = 186\n",
      "22:54:23 [DEBUG] train episode 857: reward = 108.00, steps = 108\n",
      "22:54:23 [DEBUG] train episode 858: reward = 103.00, steps = 103\n",
      "22:54:23 [DEBUG] train episode 859: reward = 40.00, steps = 40\n",
      "22:54:23 [DEBUG] train episode 860: reward = 42.00, steps = 42\n",
      "22:54:23 [DEBUG] train episode 861: reward = 200.00, steps = 200\n",
      "22:54:23 [DEBUG] train episode 862: reward = 150.00, steps = 150\n",
      "22:54:23 [DEBUG] train episode 863: reward = 200.00, steps = 200\n",
      "22:54:23 [DEBUG] train episode 864: reward = 200.00, steps = 200\n",
      "22:54:23 [DEBUG] train episode 865: reward = 123.00, steps = 123\n",
      "22:54:23 [DEBUG] train episode 866: reward = 200.00, steps = 200\n",
      "22:54:24 [DEBUG] train episode 867: reward = 163.00, steps = 163\n",
      "22:54:24 [DEBUG] train episode 868: reward = 122.00, steps = 122\n",
      "22:54:24 [DEBUG] train episode 869: reward = 197.00, steps = 197\n",
      "22:54:24 [DEBUG] train episode 870: reward = 200.00, steps = 200\n",
      "22:54:24 [DEBUG] train episode 871: reward = 158.00, steps = 158\n",
      "22:54:24 [DEBUG] train episode 872: reward = 200.00, steps = 200\n",
      "22:54:24 [DEBUG] train episode 873: reward = 114.00, steps = 114\n",
      "22:54:24 [DEBUG] train episode 874: reward = 191.00, steps = 191\n",
      "22:54:24 [DEBUG] train episode 875: reward = 200.00, steps = 200\n",
      "22:54:24 [DEBUG] train episode 876: reward = 200.00, steps = 200\n",
      "22:54:24 [DEBUG] train episode 877: reward = 200.00, steps = 200\n",
      "22:54:24 [DEBUG] train episode 878: reward = 182.00, steps = 182\n",
      "22:54:24 [DEBUG] train episode 879: reward = 130.00, steps = 130\n",
      "22:54:25 [DEBUG] train episode 880: reward = 200.00, steps = 200\n",
      "22:54:25 [DEBUG] train episode 881: reward = 200.00, steps = 200\n",
      "22:54:25 [DEBUG] train episode 882: reward = 200.00, steps = 200\n",
      "22:54:25 [DEBUG] train episode 883: reward = 30.00, steps = 30\n",
      "22:54:25 [DEBUG] train episode 884: reward = 200.00, steps = 200\n",
      "22:54:25 [DEBUG] train episode 885: reward = 102.00, steps = 102\n",
      "22:54:25 [DEBUG] train episode 886: reward = 42.00, steps = 42\n",
      "22:54:25 [DEBUG] train episode 887: reward = 99.00, steps = 99\n",
      "22:54:25 [DEBUG] train episode 888: reward = 200.00, steps = 200\n",
      "22:54:25 [DEBUG] train episode 889: reward = 135.00, steps = 135\n",
      "22:54:25 [DEBUG] train episode 890: reward = 18.00, steps = 18\n",
      "22:54:25 [DEBUG] train episode 891: reward = 173.00, steps = 173\n",
      "22:54:25 [DEBUG] train episode 892: reward = 200.00, steps = 200\n",
      "22:54:25 [DEBUG] train episode 893: reward = 32.00, steps = 32\n",
      "22:54:25 [DEBUG] train episode 894: reward = 100.00, steps = 100\n",
      "22:54:25 [DEBUG] train episode 895: reward = 200.00, steps = 200\n",
      "22:54:25 [DEBUG] train episode 896: reward = 25.00, steps = 25\n",
      "22:54:25 [DEBUG] train episode 897: reward = 188.00, steps = 188\n",
      "22:54:26 [DEBUG] train episode 898: reward = 143.00, steps = 143\n",
      "22:54:26 [DEBUG] train episode 899: reward = 200.00, steps = 200\n",
      "22:54:26 [DEBUG] train episode 900: reward = 200.00, steps = 200\n",
      "22:54:26 [DEBUG] train episode 901: reward = 34.00, steps = 34\n",
      "22:54:26 [DEBUG] train episode 902: reward = 200.00, steps = 200\n",
      "22:54:26 [DEBUG] train episode 903: reward = 200.00, steps = 200\n",
      "22:54:26 [DEBUG] train episode 904: reward = 56.00, steps = 56\n",
      "22:54:26 [DEBUG] train episode 905: reward = 200.00, steps = 200\n",
      "22:54:26 [DEBUG] train episode 906: reward = 200.00, steps = 200\n",
      "22:54:26 [DEBUG] train episode 907: reward = 48.00, steps = 48\n",
      "22:54:26 [DEBUG] train episode 908: reward = 161.00, steps = 161\n",
      "22:54:26 [DEBUG] train episode 909: reward = 153.00, steps = 153\n",
      "22:54:26 [DEBUG] train episode 910: reward = 172.00, steps = 172\n",
      "22:54:26 [DEBUG] train episode 911: reward = 200.00, steps = 200\n",
      "22:54:26 [DEBUG] train episode 912: reward = 146.00, steps = 146\n",
      "22:54:26 [DEBUG] train episode 913: reward = 71.00, steps = 71\n",
      "22:54:26 [DEBUG] train episode 914: reward = 133.00, steps = 133\n",
      "22:54:26 [DEBUG] train episode 915: reward = 65.00, steps = 65\n",
      "22:54:26 [DEBUG] train episode 916: reward = 73.00, steps = 73\n",
      "22:54:27 [DEBUG] train episode 917: reward = 31.00, steps = 31\n",
      "22:54:27 [DEBUG] train episode 918: reward = 75.00, steps = 75\n",
      "22:54:27 [DEBUG] train episode 919: reward = 26.00, steps = 26\n",
      "22:54:27 [DEBUG] train episode 920: reward = 79.00, steps = 79\n",
      "22:54:27 [DEBUG] train episode 921: reward = 158.00, steps = 158\n",
      "22:54:27 [DEBUG] train episode 922: reward = 74.00, steps = 74\n",
      "22:54:27 [DEBUG] train episode 923: reward = 83.00, steps = 83\n",
      "22:54:27 [DEBUG] train episode 924: reward = 81.00, steps = 81\n",
      "22:54:27 [DEBUG] train episode 925: reward = 25.00, steps = 25\n",
      "22:54:27 [DEBUG] train episode 926: reward = 20.00, steps = 20\n",
      "22:54:27 [DEBUG] train episode 927: reward = 177.00, steps = 177\n",
      "22:54:27 [DEBUG] train episode 928: reward = 200.00, steps = 200\n",
      "22:54:27 [DEBUG] train episode 929: reward = 71.00, steps = 71\n",
      "22:54:27 [DEBUG] train episode 930: reward = 13.00, steps = 13\n",
      "22:54:27 [DEBUG] train episode 931: reward = 200.00, steps = 200\n",
      "22:54:27 [DEBUG] train episode 932: reward = 61.00, steps = 61\n",
      "22:54:27 [DEBUG] train episode 933: reward = 200.00, steps = 200\n",
      "22:54:27 [DEBUG] train episode 934: reward = 54.00, steps = 54\n",
      "22:54:27 [DEBUG] train episode 935: reward = 65.00, steps = 65\n",
      "22:54:27 [DEBUG] train episode 936: reward = 198.00, steps = 198\n",
      "22:54:27 [DEBUG] train episode 937: reward = 37.00, steps = 37\n",
      "22:54:27 [DEBUG] train episode 938: reward = 119.00, steps = 119\n",
      "22:54:27 [DEBUG] train episode 939: reward = 200.00, steps = 200\n",
      "22:54:28 [DEBUG] train episode 940: reward = 185.00, steps = 185\n",
      "22:54:28 [DEBUG] train episode 941: reward = 128.00, steps = 128\n",
      "22:54:28 [DEBUG] train episode 942: reward = 40.00, steps = 40\n",
      "22:54:28 [DEBUG] train episode 943: reward = 200.00, steps = 200\n",
      "22:54:28 [DEBUG] train episode 944: reward = 59.00, steps = 59\n",
      "22:54:28 [DEBUG] train episode 945: reward = 200.00, steps = 200\n",
      "22:54:28 [DEBUG] train episode 946: reward = 140.00, steps = 140\n",
      "22:54:28 [DEBUG] train episode 947: reward = 25.00, steps = 25\n",
      "22:54:28 [DEBUG] train episode 948: reward = 100.00, steps = 100\n",
      "22:54:28 [DEBUG] train episode 949: reward = 22.00, steps = 22\n",
      "22:54:28 [DEBUG] train episode 950: reward = 200.00, steps = 200\n",
      "22:54:28 [DEBUG] train episode 951: reward = 200.00, steps = 200\n",
      "22:54:28 [DEBUG] train episode 952: reward = 200.00, steps = 200\n",
      "22:54:28 [DEBUG] train episode 953: reward = 166.00, steps = 166\n",
      "22:54:28 [DEBUG] train episode 954: reward = 200.00, steps = 200\n",
      "22:54:28 [DEBUG] train episode 955: reward = 169.00, steps = 169\n",
      "22:54:28 [DEBUG] train episode 956: reward = 114.00, steps = 114\n",
      "22:54:28 [DEBUG] train episode 957: reward = 200.00, steps = 200\n",
      "22:54:28 [DEBUG] train episode 958: reward = 103.00, steps = 103\n",
      "22:54:29 [DEBUG] train episode 959: reward = 176.00, steps = 176\n",
      "22:54:29 [DEBUG] train episode 960: reward = 155.00, steps = 155\n",
      "22:54:29 [DEBUG] train episode 961: reward = 126.00, steps = 126\n",
      "22:54:29 [DEBUG] train episode 962: reward = 200.00, steps = 200\n",
      "22:54:29 [DEBUG] train episode 963: reward = 120.00, steps = 120\n",
      "22:54:29 [DEBUG] train episode 964: reward = 159.00, steps = 159\n",
      "22:54:29 [DEBUG] train episode 965: reward = 156.00, steps = 156\n",
      "22:54:29 [DEBUG] train episode 966: reward = 200.00, steps = 200\n",
      "22:54:29 [DEBUG] train episode 967: reward = 91.00, steps = 91\n",
      "22:54:29 [DEBUG] train episode 968: reward = 200.00, steps = 200\n",
      "22:54:29 [DEBUG] train episode 969: reward = 162.00, steps = 162\n",
      "22:54:29 [DEBUG] train episode 970: reward = 189.00, steps = 189\n",
      "22:54:29 [DEBUG] train episode 971: reward = 64.00, steps = 64\n",
      "22:54:29 [DEBUG] train episode 972: reward = 123.00, steps = 123\n",
      "22:54:29 [DEBUG] train episode 973: reward = 200.00, steps = 200\n",
      "22:54:29 [DEBUG] train episode 974: reward = 200.00, steps = 200\n",
      "22:54:29 [DEBUG] train episode 975: reward = 58.00, steps = 58\n",
      "22:54:29 [DEBUG] train episode 976: reward = 133.00, steps = 133\n",
      "22:54:29 [DEBUG] train episode 977: reward = 100.00, steps = 100\n",
      "22:54:29 [DEBUG] train episode 978: reward = 200.00, steps = 200\n",
      "22:54:29 [DEBUG] train episode 979: reward = 106.00, steps = 106\n",
      "22:54:30 [DEBUG] train episode 980: reward = 167.00, steps = 167\n",
      "22:54:30 [DEBUG] train episode 981: reward = 116.00, steps = 116\n",
      "22:54:30 [DEBUG] train episode 982: reward = 158.00, steps = 158\n",
      "22:54:30 [DEBUG] train episode 983: reward = 200.00, steps = 200\n",
      "22:54:30 [DEBUG] train episode 984: reward = 200.00, steps = 200\n",
      "22:54:30 [DEBUG] train episode 985: reward = 112.00, steps = 112\n",
      "22:54:30 [DEBUG] train episode 986: reward = 200.00, steps = 200\n",
      "22:54:30 [DEBUG] train episode 987: reward = 200.00, steps = 200\n",
      "22:54:30 [DEBUG] train episode 988: reward = 200.00, steps = 200\n",
      "22:54:30 [DEBUG] train episode 989: reward = 148.00, steps = 148\n",
      "22:54:30 [DEBUG] train episode 990: reward = 133.00, steps = 133\n",
      "22:54:30 [DEBUG] train episode 991: reward = 200.00, steps = 200\n",
      "22:54:30 [DEBUG] train episode 992: reward = 200.00, steps = 200\n",
      "22:54:30 [DEBUG] train episode 993: reward = 200.00, steps = 200\n",
      "22:54:30 [DEBUG] train episode 994: reward = 60.00, steps = 60\n",
      "22:54:30 [DEBUG] train episode 995: reward = 188.00, steps = 188\n",
      "22:54:30 [DEBUG] train episode 996: reward = 120.00, steps = 120\n",
      "22:54:30 [DEBUG] train episode 997: reward = 70.00, steps = 70\n",
      "22:54:30 [DEBUG] train episode 998: reward = 70.00, steps = 70\n",
      "22:54:31 [DEBUG] train episode 999: reward = 200.00, steps = 200\n",
      "22:54:31 [DEBUG] train episode 1000: reward = 200.00, steps = 200\n",
      "22:54:31 [DEBUG] train episode 1001: reward = 200.00, steps = 200\n",
      "22:54:31 [DEBUG] train episode 1002: reward = 181.00, steps = 181\n",
      "22:54:31 [DEBUG] train episode 1003: reward = 34.00, steps = 34\n",
      "22:54:31 [DEBUG] train episode 1004: reward = 177.00, steps = 177\n",
      "22:54:31 [DEBUG] train episode 1005: reward = 200.00, steps = 200\n",
      "22:54:31 [DEBUG] train episode 1006: reward = 97.00, steps = 97\n",
      "22:54:31 [DEBUG] train episode 1007: reward = 155.00, steps = 155\n",
      "22:54:31 [DEBUG] train episode 1008: reward = 200.00, steps = 200\n",
      "22:54:31 [DEBUG] train episode 1009: reward = 163.00, steps = 163\n",
      "22:54:31 [DEBUG] train episode 1010: reward = 56.00, steps = 56\n",
      "22:54:31 [DEBUG] train episode 1011: reward = 168.00, steps = 168\n",
      "22:54:31 [DEBUG] train episode 1012: reward = 78.00, steps = 78\n",
      "22:54:31 [DEBUG] train episode 1013: reward = 149.00, steps = 149\n",
      "22:54:31 [DEBUG] train episode 1014: reward = 24.00, steps = 24\n",
      "22:54:31 [DEBUG] train episode 1015: reward = 48.00, steps = 48\n",
      "22:54:31 [DEBUG] train episode 1016: reward = 200.00, steps = 200\n",
      "22:54:31 [DEBUG] train episode 1017: reward = 189.00, steps = 189\n",
      "22:54:31 [DEBUG] train episode 1018: reward = 151.00, steps = 151\n",
      "22:54:31 [DEBUG] train episode 1019: reward = 84.00, steps = 84\n",
      "22:54:31 [DEBUG] train episode 1020: reward = 114.00, steps = 114\n",
      "22:54:32 [DEBUG] train episode 1021: reward = 200.00, steps = 200\n",
      "22:54:32 [DEBUG] train episode 1022: reward = 194.00, steps = 194\n",
      "22:54:32 [DEBUG] train episode 1023: reward = 200.00, steps = 200\n",
      "22:54:32 [DEBUG] train episode 1024: reward = 200.00, steps = 200\n",
      "22:54:32 [DEBUG] train episode 1025: reward = 55.00, steps = 55\n",
      "22:54:32 [DEBUG] train episode 1026: reward = 200.00, steps = 200\n",
      "22:54:32 [DEBUG] train episode 1027: reward = 200.00, steps = 200\n",
      "22:54:32 [DEBUG] train episode 1028: reward = 200.00, steps = 200\n",
      "22:54:32 [DEBUG] train episode 1029: reward = 130.00, steps = 130\n",
      "22:54:32 [DEBUG] train episode 1030: reward = 78.00, steps = 78\n",
      "22:54:32 [DEBUG] train episode 1031: reward = 50.00, steps = 50\n",
      "22:54:32 [DEBUG] train episode 1032: reward = 129.00, steps = 129\n",
      "22:54:32 [DEBUG] train episode 1033: reward = 190.00, steps = 190\n",
      "22:54:32 [DEBUG] train episode 1034: reward = 164.00, steps = 164\n",
      "22:54:32 [DEBUG] train episode 1035: reward = 190.00, steps = 190\n",
      "22:54:32 [DEBUG] train episode 1036: reward = 66.00, steps = 66\n",
      "22:54:32 [DEBUG] train episode 1037: reward = 200.00, steps = 200\n",
      "22:54:32 [DEBUG] train episode 1038: reward = 15.00, steps = 15\n",
      "22:54:32 [DEBUG] train episode 1039: reward = 29.00, steps = 29\n",
      "22:54:32 [DEBUG] train episode 1040: reward = 149.00, steps = 149\n",
      "22:54:32 [DEBUG] train episode 1041: reward = 146.00, steps = 146\n",
      "22:54:32 [DEBUG] train episode 1042: reward = 75.00, steps = 75\n",
      "22:54:33 [DEBUG] train episode 1043: reward = 145.00, steps = 145\n",
      "22:54:33 [DEBUG] train episode 1044: reward = 200.00, steps = 200\n",
      "22:54:33 [DEBUG] train episode 1045: reward = 93.00, steps = 93\n",
      "22:54:33 [DEBUG] train episode 1046: reward = 160.00, steps = 160\n",
      "22:54:33 [DEBUG] train episode 1047: reward = 189.00, steps = 189\n",
      "22:54:33 [DEBUG] train episode 1048: reward = 36.00, steps = 36\n",
      "22:54:33 [DEBUG] train episode 1049: reward = 156.00, steps = 156\n",
      "22:54:33 [DEBUG] train episode 1050: reward = 127.00, steps = 127\n",
      "22:54:33 [DEBUG] train episode 1051: reward = 200.00, steps = 200\n",
      "22:54:33 [DEBUG] train episode 1052: reward = 80.00, steps = 80\n",
      "22:54:33 [DEBUG] train episode 1053: reward = 200.00, steps = 200\n",
      "22:54:33 [DEBUG] train episode 1054: reward = 200.00, steps = 200\n",
      "22:54:33 [DEBUG] train episode 1055: reward = 65.00, steps = 65\n",
      "22:54:33 [DEBUG] train episode 1056: reward = 169.00, steps = 169\n",
      "22:54:33 [DEBUG] train episode 1057: reward = 63.00, steps = 63\n",
      "22:54:33 [DEBUG] train episode 1058: reward = 200.00, steps = 200\n",
      "22:54:33 [DEBUG] train episode 1059: reward = 200.00, steps = 200\n",
      "22:54:34 [DEBUG] train episode 1060: reward = 200.00, steps = 200\n",
      "22:54:34 [DEBUG] train episode 1061: reward = 200.00, steps = 200\n",
      "22:54:34 [DEBUG] train episode 1062: reward = 129.00, steps = 129\n",
      "22:54:34 [DEBUG] train episode 1063: reward = 112.00, steps = 112\n",
      "22:54:34 [DEBUG] train episode 1064: reward = 147.00, steps = 147\n",
      "22:54:34 [DEBUG] train episode 1065: reward = 178.00, steps = 178\n",
      "22:54:34 [DEBUG] train episode 1066: reward = 70.00, steps = 70\n",
      "22:54:34 [DEBUG] train episode 1067: reward = 160.00, steps = 160\n",
      "22:54:34 [DEBUG] train episode 1068: reward = 200.00, steps = 200\n",
      "22:54:34 [DEBUG] train episode 1069: reward = 200.00, steps = 200\n",
      "22:54:34 [DEBUG] train episode 1070: reward = 47.00, steps = 47\n",
      "22:54:34 [DEBUG] train episode 1071: reward = 125.00, steps = 125\n",
      "22:54:34 [DEBUG] train episode 1072: reward = 88.00, steps = 88\n",
      "22:54:34 [DEBUG] train episode 1073: reward = 153.00, steps = 153\n",
      "22:54:34 [DEBUG] train episode 1074: reward = 143.00, steps = 143\n",
      "22:54:34 [DEBUG] train episode 1075: reward = 153.00, steps = 153\n",
      "22:54:34 [DEBUG] train episode 1076: reward = 143.00, steps = 143\n",
      "22:54:35 [DEBUG] train episode 1077: reward = 200.00, steps = 200\n",
      "22:54:35 [DEBUG] train episode 1078: reward = 200.00, steps = 200\n",
      "22:54:35 [DEBUG] train episode 1079: reward = 66.00, steps = 66\n",
      "22:54:35 [DEBUG] train episode 1080: reward = 200.00, steps = 200\n",
      "22:54:35 [DEBUG] train episode 1081: reward = 200.00, steps = 200\n",
      "22:54:35 [DEBUG] train episode 1082: reward = 52.00, steps = 52\n",
      "22:54:35 [DEBUG] train episode 1083: reward = 192.00, steps = 192\n",
      "22:54:35 [DEBUG] train episode 1084: reward = 115.00, steps = 115\n",
      "22:54:35 [DEBUG] train episode 1085: reward = 200.00, steps = 200\n",
      "22:54:35 [DEBUG] train episode 1086: reward = 75.00, steps = 75\n",
      "22:54:35 [DEBUG] train episode 1087: reward = 164.00, steps = 164\n",
      "22:54:35 [DEBUG] train episode 1088: reward = 135.00, steps = 135\n",
      "22:54:35 [DEBUG] train episode 1089: reward = 42.00, steps = 42\n",
      "22:54:35 [DEBUG] train episode 1090: reward = 162.00, steps = 162\n",
      "22:54:35 [DEBUG] train episode 1091: reward = 139.00, steps = 139\n",
      "22:54:35 [DEBUG] train episode 1092: reward = 200.00, steps = 200\n",
      "22:54:35 [DEBUG] train episode 1093: reward = 200.00, steps = 200\n",
      "22:54:36 [DEBUG] train episode 1094: reward = 169.00, steps = 169\n",
      "22:54:36 [DEBUG] train episode 1095: reward = 165.00, steps = 165\n",
      "22:54:36 [DEBUG] train episode 1096: reward = 182.00, steps = 182\n",
      "22:54:36 [DEBUG] train episode 1097: reward = 200.00, steps = 200\n",
      "22:54:36 [DEBUG] train episode 1098: reward = 94.00, steps = 94\n",
      "22:54:36 [DEBUG] train episode 1099: reward = 200.00, steps = 200\n",
      "22:54:36 [DEBUG] train episode 1100: reward = 96.00, steps = 96\n",
      "22:54:36 [DEBUG] train episode 1101: reward = 16.00, steps = 16\n",
      "22:54:36 [DEBUG] train episode 1102: reward = 144.00, steps = 144\n",
      "22:54:36 [DEBUG] train episode 1103: reward = 59.00, steps = 59\n",
      "22:54:36 [DEBUG] train episode 1104: reward = 200.00, steps = 200\n",
      "22:54:36 [DEBUG] train episode 1105: reward = 200.00, steps = 200\n",
      "22:54:36 [DEBUG] train episode 1106: reward = 200.00, steps = 200\n",
      "22:54:36 [DEBUG] train episode 1107: reward = 163.00, steps = 163\n",
      "22:54:36 [DEBUG] train episode 1108: reward = 169.00, steps = 169\n",
      "22:54:36 [DEBUG] train episode 1109: reward = 119.00, steps = 119\n",
      "22:54:37 [DEBUG] train episode 1110: reward = 171.00, steps = 171\n",
      "22:54:37 [DEBUG] train episode 1111: reward = 166.00, steps = 166\n",
      "22:54:37 [DEBUG] train episode 1112: reward = 200.00, steps = 200\n",
      "22:54:37 [DEBUG] train episode 1113: reward = 168.00, steps = 168\n",
      "22:54:37 [DEBUG] train episode 1114: reward = 200.00, steps = 200\n",
      "22:54:37 [DEBUG] train episode 1115: reward = 75.00, steps = 75\n",
      "22:54:37 [DEBUG] train episode 1116: reward = 35.00, steps = 35\n",
      "22:54:37 [DEBUG] train episode 1117: reward = 200.00, steps = 200\n",
      "22:54:37 [DEBUG] train episode 1118: reward = 80.00, steps = 80\n",
      "22:54:37 [DEBUG] train episode 1119: reward = 200.00, steps = 200\n",
      "22:54:37 [DEBUG] train episode 1120: reward = 200.00, steps = 200\n",
      "22:54:37 [DEBUG] train episode 1121: reward = 71.00, steps = 71\n",
      "22:54:37 [DEBUG] train episode 1122: reward = 20.00, steps = 20\n",
      "22:54:37 [DEBUG] train episode 1123: reward = 131.00, steps = 131\n",
      "22:54:37 [DEBUG] train episode 1124: reward = 159.00, steps = 159\n",
      "22:54:37 [DEBUG] train episode 1125: reward = 68.00, steps = 68\n",
      "22:54:37 [DEBUG] train episode 1126: reward = 47.00, steps = 47\n",
      "22:54:37 [DEBUG] train episode 1127: reward = 149.00, steps = 149\n",
      "22:54:37 [DEBUG] train episode 1128: reward = 200.00, steps = 200\n",
      "22:54:37 [DEBUG] train episode 1129: reward = 69.00, steps = 69\n",
      "22:54:38 [DEBUG] train episode 1130: reward = 100.00, steps = 100\n",
      "22:54:38 [DEBUG] train episode 1131: reward = 151.00, steps = 151\n",
      "22:54:38 [DEBUG] train episode 1132: reward = 47.00, steps = 47\n",
      "22:54:38 [DEBUG] train episode 1133: reward = 117.00, steps = 117\n",
      "22:54:38 [DEBUG] train episode 1134: reward = 140.00, steps = 140\n",
      "22:54:38 [DEBUG] train episode 1135: reward = 121.00, steps = 121\n",
      "22:54:38 [DEBUG] train episode 1136: reward = 133.00, steps = 133\n",
      "22:54:38 [DEBUG] train episode 1137: reward = 70.00, steps = 70\n",
      "22:54:38 [DEBUG] train episode 1138: reward = 108.00, steps = 108\n",
      "22:54:38 [DEBUG] train episode 1139: reward = 73.00, steps = 73\n",
      "22:54:38 [DEBUG] train episode 1140: reward = 105.00, steps = 105\n",
      "22:54:38 [DEBUG] train episode 1141: reward = 91.00, steps = 91\n",
      "22:54:38 [DEBUG] train episode 1142: reward = 95.00, steps = 95\n",
      "22:54:38 [DEBUG] train episode 1143: reward = 175.00, steps = 175\n",
      "22:54:38 [DEBUG] train episode 1144: reward = 200.00, steps = 200\n",
      "22:54:38 [DEBUG] train episode 1145: reward = 131.00, steps = 131\n",
      "22:54:38 [DEBUG] train episode 1146: reward = 120.00, steps = 120\n",
      "22:54:38 [DEBUG] train episode 1147: reward = 132.00, steps = 132\n",
      "22:54:39 [DEBUG] train episode 1148: reward = 85.00, steps = 85\n",
      "22:54:39 [DEBUG] train episode 1149: reward = 189.00, steps = 189\n",
      "22:54:39 [DEBUG] train episode 1150: reward = 200.00, steps = 200\n",
      "22:54:39 [DEBUG] train episode 1151: reward = 200.00, steps = 200\n",
      "22:54:39 [DEBUG] train episode 1152: reward = 200.00, steps = 200\n",
      "22:54:39 [DEBUG] train episode 1153: reward = 180.00, steps = 180\n",
      "22:54:39 [DEBUG] train episode 1154: reward = 123.00, steps = 123\n",
      "22:54:39 [DEBUG] train episode 1155: reward = 200.00, steps = 200\n",
      "22:54:39 [DEBUG] train episode 1156: reward = 141.00, steps = 141\n",
      "22:54:39 [DEBUG] train episode 1157: reward = 135.00, steps = 135\n",
      "22:54:39 [DEBUG] train episode 1158: reward = 200.00, steps = 200\n",
      "22:54:39 [DEBUG] train episode 1159: reward = 152.00, steps = 152\n",
      "22:54:39 [DEBUG] train episode 1160: reward = 200.00, steps = 200\n",
      "22:54:39 [DEBUG] train episode 1161: reward = 200.00, steps = 200\n",
      "22:54:39 [DEBUG] train episode 1162: reward = 109.00, steps = 109\n",
      "22:54:39 [DEBUG] train episode 1163: reward = 120.00, steps = 120\n",
      "22:54:40 [DEBUG] train episode 1164: reward = 129.00, steps = 129\n",
      "22:54:40 [DEBUG] train episode 1165: reward = 200.00, steps = 200\n",
      "22:54:40 [DEBUG] train episode 1166: reward = 200.00, steps = 200\n",
      "22:54:40 [DEBUG] train episode 1167: reward = 52.00, steps = 52\n",
      "22:54:40 [DEBUG] train episode 1168: reward = 179.00, steps = 179\n",
      "22:54:40 [DEBUG] train episode 1169: reward = 82.00, steps = 82\n",
      "22:54:40 [DEBUG] train episode 1170: reward = 148.00, steps = 148\n",
      "22:54:40 [DEBUG] train episode 1171: reward = 172.00, steps = 172\n",
      "22:54:40 [DEBUG] train episode 1172: reward = 200.00, steps = 200\n",
      "22:54:40 [DEBUG] train episode 1173: reward = 195.00, steps = 195\n",
      "22:54:40 [DEBUG] train episode 1174: reward = 156.00, steps = 156\n",
      "22:54:40 [DEBUG] train episode 1175: reward = 143.00, steps = 143\n",
      "22:54:40 [DEBUG] train episode 1176: reward = 97.00, steps = 97\n",
      "22:54:40 [DEBUG] train episode 1177: reward = 170.00, steps = 170\n",
      "22:54:40 [DEBUG] train episode 1178: reward = 135.00, steps = 135\n",
      "22:54:40 [DEBUG] train episode 1179: reward = 89.00, steps = 89\n",
      "22:54:40 [DEBUG] train episode 1180: reward = 200.00, steps = 200\n",
      "22:54:40 [DEBUG] train episode 1181: reward = 44.00, steps = 44\n",
      "22:54:40 [DEBUG] train episode 1182: reward = 200.00, steps = 200\n",
      "22:54:41 [DEBUG] train episode 1183: reward = 37.00, steps = 37\n",
      "22:54:41 [DEBUG] train episode 1184: reward = 145.00, steps = 145\n",
      "22:54:41 [DEBUG] train episode 1185: reward = 183.00, steps = 183\n",
      "22:54:41 [DEBUG] train episode 1186: reward = 105.00, steps = 105\n",
      "22:54:41 [DEBUG] train episode 1187: reward = 186.00, steps = 186\n",
      "22:54:41 [DEBUG] train episode 1188: reward = 181.00, steps = 181\n",
      "22:54:41 [DEBUG] train episode 1189: reward = 121.00, steps = 121\n",
      "22:54:41 [DEBUG] train episode 1190: reward = 200.00, steps = 200\n",
      "22:54:41 [DEBUG] train episode 1191: reward = 200.00, steps = 200\n",
      "22:54:41 [DEBUG] train episode 1192: reward = 108.00, steps = 108\n",
      "22:54:41 [DEBUG] train episode 1193: reward = 106.00, steps = 106\n",
      "22:54:41 [DEBUG] train episode 1194: reward = 200.00, steps = 200\n",
      "22:54:41 [DEBUG] train episode 1195: reward = 134.00, steps = 134\n",
      "22:54:41 [DEBUG] train episode 1196: reward = 48.00, steps = 48\n",
      "22:54:41 [DEBUG] train episode 1197: reward = 200.00, steps = 200\n",
      "22:54:41 [DEBUG] train episode 1198: reward = 200.00, steps = 200\n",
      "22:54:41 [DEBUG] train episode 1199: reward = 200.00, steps = 200\n",
      "22:54:42 [DEBUG] train episode 1200: reward = 138.00, steps = 138\n",
      "22:54:42 [DEBUG] train episode 1201: reward = 165.00, steps = 165\n",
      "22:54:42 [DEBUG] train episode 1202: reward = 167.00, steps = 167\n",
      "22:54:42 [DEBUG] train episode 1203: reward = 151.00, steps = 151\n",
      "22:54:42 [DEBUG] train episode 1204: reward = 81.00, steps = 81\n",
      "22:54:42 [DEBUG] train episode 1205: reward = 171.00, steps = 171\n",
      "22:54:42 [DEBUG] train episode 1206: reward = 183.00, steps = 183\n",
      "22:54:42 [DEBUG] train episode 1207: reward = 165.00, steps = 165\n",
      "22:54:42 [DEBUG] train episode 1208: reward = 162.00, steps = 162\n",
      "22:54:42 [DEBUG] train episode 1209: reward = 41.00, steps = 41\n",
      "22:54:42 [DEBUG] train episode 1210: reward = 198.00, steps = 198\n",
      "22:54:42 [DEBUG] train episode 1211: reward = 181.00, steps = 181\n",
      "22:54:42 [DEBUG] train episode 1212: reward = 171.00, steps = 171\n",
      "22:54:42 [DEBUG] train episode 1213: reward = 47.00, steps = 47\n",
      "22:54:42 [DEBUG] train episode 1214: reward = 200.00, steps = 200\n",
      "22:54:42 [DEBUG] train episode 1215: reward = 109.00, steps = 109\n",
      "22:54:43 [DEBUG] train episode 1216: reward = 155.00, steps = 155\n",
      "22:54:43 [DEBUG] train episode 1217: reward = 196.00, steps = 196\n",
      "22:54:43 [DEBUG] train episode 1218: reward = 186.00, steps = 186\n",
      "22:54:43 [DEBUG] train episode 1219: reward = 200.00, steps = 200\n",
      "22:54:43 [DEBUG] train episode 1220: reward = 34.00, steps = 34\n",
      "22:54:43 [DEBUG] train episode 1221: reward = 144.00, steps = 144\n",
      "22:54:43 [DEBUG] train episode 1222: reward = 151.00, steps = 151\n",
      "22:54:43 [DEBUG] train episode 1223: reward = 194.00, steps = 194\n",
      "22:54:43 [DEBUG] train episode 1224: reward = 51.00, steps = 51\n",
      "22:54:43 [DEBUG] train episode 1225: reward = 102.00, steps = 102\n",
      "22:54:43 [DEBUG] train episode 1226: reward = 200.00, steps = 200\n",
      "22:54:43 [DEBUG] train episode 1227: reward = 200.00, steps = 200\n",
      "22:54:43 [DEBUG] train episode 1228: reward = 200.00, steps = 200\n",
      "22:54:43 [DEBUG] train episode 1229: reward = 129.00, steps = 129\n",
      "22:54:43 [DEBUG] train episode 1230: reward = 35.00, steps = 35\n",
      "22:54:44 [DEBUG] train episode 1231: reward = 111.00, steps = 111\n",
      "22:54:44 [DEBUG] train episode 1232: reward = 158.00, steps = 158\n",
      "22:54:44 [DEBUG] train episode 1233: reward = 74.00, steps = 74\n",
      "22:54:44 [DEBUG] train episode 1234: reward = 139.00, steps = 139\n",
      "22:54:44 [DEBUG] train episode 1235: reward = 180.00, steps = 180\n",
      "22:54:44 [DEBUG] train episode 1236: reward = 200.00, steps = 200\n",
      "22:54:44 [DEBUG] train episode 1237: reward = 200.00, steps = 200\n",
      "22:54:44 [DEBUG] train episode 1238: reward = 200.00, steps = 200\n",
      "22:54:44 [DEBUG] train episode 1239: reward = 75.00, steps = 75\n",
      "22:54:44 [DEBUG] train episode 1240: reward = 200.00, steps = 200\n",
      "22:54:44 [DEBUG] train episode 1241: reward = 144.00, steps = 144\n",
      "22:54:44 [DEBUG] train episode 1242: reward = 117.00, steps = 117\n",
      "22:54:44 [DEBUG] train episode 1243: reward = 46.00, steps = 46\n",
      "22:54:44 [DEBUG] train episode 1244: reward = 107.00, steps = 107\n",
      "22:54:45 [DEBUG] train episode 1245: reward = 175.00, steps = 175\n",
      "22:54:45 [DEBUG] train episode 1246: reward = 100.00, steps = 100\n",
      "22:54:45 [DEBUG] train episode 1247: reward = 45.00, steps = 45\n",
      "22:54:45 [DEBUG] train episode 1248: reward = 200.00, steps = 200\n",
      "22:54:45 [DEBUG] train episode 1249: reward = 200.00, steps = 200\n",
      "22:54:45 [DEBUG] train episode 1250: reward = 200.00, steps = 200\n",
      "22:54:45 [DEBUG] train episode 1251: reward = 169.00, steps = 169\n",
      "22:54:45 [DEBUG] train episode 1252: reward = 152.00, steps = 152\n",
      "22:54:45 [DEBUG] train episode 1253: reward = 200.00, steps = 200\n",
      "22:54:45 [DEBUG] train episode 1254: reward = 155.00, steps = 155\n",
      "22:54:45 [DEBUG] train episode 1255: reward = 199.00, steps = 199\n",
      "22:54:45 [DEBUG] train episode 1256: reward = 124.00, steps = 124\n",
      "22:54:45 [DEBUG] train episode 1257: reward = 178.00, steps = 178\n",
      "22:54:45 [DEBUG] train episode 1258: reward = 174.00, steps = 174\n",
      "22:54:45 [DEBUG] train episode 1259: reward = 200.00, steps = 200\n",
      "22:54:46 [DEBUG] train episode 1260: reward = 196.00, steps = 196\n",
      "22:54:46 [DEBUG] train episode 1261: reward = 177.00, steps = 177\n",
      "22:54:46 [DEBUG] train episode 1262: reward = 120.00, steps = 120\n",
      "22:54:46 [DEBUG] train episode 1263: reward = 182.00, steps = 182\n",
      "22:54:46 [DEBUG] train episode 1264: reward = 198.00, steps = 198\n",
      "22:54:46 [DEBUG] train episode 1265: reward = 200.00, steps = 200\n",
      "22:54:46 [DEBUG] train episode 1266: reward = 44.00, steps = 44\n",
      "22:54:46 [DEBUG] train episode 1267: reward = 200.00, steps = 200\n",
      "22:54:46 [DEBUG] train episode 1268: reward = 151.00, steps = 151\n",
      "22:54:46 [DEBUG] train episode 1269: reward = 139.00, steps = 139\n",
      "22:54:46 [DEBUG] train episode 1270: reward = 200.00, steps = 200\n",
      "22:54:46 [DEBUG] train episode 1271: reward = 200.00, steps = 200\n",
      "22:54:47 [DEBUG] train episode 1272: reward = 200.00, steps = 200\n",
      "22:54:47 [DEBUG] train episode 1273: reward = 113.00, steps = 113\n",
      "22:54:47 [DEBUG] train episode 1274: reward = 156.00, steps = 156\n",
      "22:54:47 [DEBUG] train episode 1275: reward = 150.00, steps = 150\n",
      "22:54:47 [DEBUG] train episode 1276: reward = 159.00, steps = 159\n",
      "22:54:47 [DEBUG] train episode 1277: reward = 131.00, steps = 131\n",
      "22:54:47 [DEBUG] train episode 1278: reward = 110.00, steps = 110\n",
      "22:54:47 [DEBUG] train episode 1279: reward = 177.00, steps = 177\n",
      "22:54:47 [DEBUG] train episode 1280: reward = 191.00, steps = 191\n",
      "22:54:47 [DEBUG] train episode 1281: reward = 199.00, steps = 199\n",
      "22:54:47 [DEBUG] train episode 1282: reward = 200.00, steps = 200\n",
      "22:54:47 [DEBUG] train episode 1283: reward = 174.00, steps = 174\n",
      "22:54:47 [DEBUG] train episode 1284: reward = 200.00, steps = 200\n",
      "22:54:47 [DEBUG] train episode 1285: reward = 200.00, steps = 200\n",
      "22:54:48 [DEBUG] train episode 1286: reward = 200.00, steps = 200\n",
      "22:54:48 [DEBUG] train episode 1287: reward = 28.00, steps = 28\n",
      "22:54:48 [DEBUG] train episode 1288: reward = 141.00, steps = 141\n",
      "22:54:48 [DEBUG] train episode 1289: reward = 135.00, steps = 135\n",
      "22:54:48 [DEBUG] train episode 1290: reward = 72.00, steps = 72\n",
      "22:54:48 [DEBUG] train episode 1291: reward = 200.00, steps = 200\n",
      "22:54:48 [DEBUG] train episode 1292: reward = 153.00, steps = 153\n",
      "22:54:48 [DEBUG] train episode 1293: reward = 200.00, steps = 200\n",
      "22:54:48 [DEBUG] train episode 1294: reward = 119.00, steps = 119\n",
      "22:54:48 [DEBUG] train episode 1295: reward = 184.00, steps = 184\n",
      "22:54:48 [DEBUG] train episode 1296: reward = 160.00, steps = 160\n",
      "22:54:48 [DEBUG] train episode 1297: reward = 200.00, steps = 200\n",
      "22:54:48 [DEBUG] train episode 1298: reward = 82.00, steps = 82\n",
      "22:54:48 [DEBUG] train episode 1299: reward = 200.00, steps = 200\n",
      "22:54:48 [DEBUG] train episode 1300: reward = 129.00, steps = 129\n",
      "22:54:49 [DEBUG] train episode 1301: reward = 200.00, steps = 200\n",
      "22:54:49 [DEBUG] train episode 1302: reward = 172.00, steps = 172\n",
      "22:54:49 [DEBUG] train episode 1303: reward = 31.00, steps = 31\n",
      "22:54:49 [DEBUG] train episode 1304: reward = 185.00, steps = 185\n",
      "22:54:49 [DEBUG] train episode 1305: reward = 56.00, steps = 56\n",
      "22:54:49 [DEBUG] train episode 1306: reward = 163.00, steps = 163\n",
      "22:54:49 [DEBUG] train episode 1307: reward = 156.00, steps = 156\n",
      "22:54:49 [DEBUG] train episode 1308: reward = 182.00, steps = 182\n",
      "22:54:49 [DEBUG] train episode 1309: reward = 200.00, steps = 200\n",
      "22:54:49 [DEBUG] train episode 1310: reward = 196.00, steps = 196\n",
      "22:54:49 [DEBUG] train episode 1311: reward = 200.00, steps = 200\n",
      "22:54:49 [DEBUG] train episode 1312: reward = 200.00, steps = 200\n",
      "22:54:49 [DEBUG] train episode 1313: reward = 182.00, steps = 182\n",
      "22:54:50 [DEBUG] train episode 1314: reward = 170.00, steps = 170\n",
      "22:54:50 [DEBUG] train episode 1315: reward = 39.00, steps = 39\n",
      "22:54:50 [DEBUG] train episode 1316: reward = 27.00, steps = 27\n",
      "22:54:50 [DEBUG] train episode 1317: reward = 200.00, steps = 200\n",
      "22:54:50 [DEBUG] train episode 1318: reward = 200.00, steps = 200\n",
      "22:54:50 [DEBUG] train episode 1319: reward = 114.00, steps = 114\n",
      "22:54:50 [DEBUG] train episode 1320: reward = 200.00, steps = 200\n",
      "22:54:50 [DEBUG] train episode 1321: reward = 200.00, steps = 200\n",
      "22:54:50 [DEBUG] train episode 1322: reward = 119.00, steps = 119\n",
      "22:54:50 [DEBUG] train episode 1323: reward = 200.00, steps = 200\n",
      "22:54:50 [DEBUG] train episode 1324: reward = 75.00, steps = 75\n",
      "22:54:50 [DEBUG] train episode 1325: reward = 146.00, steps = 146\n",
      "22:54:50 [DEBUG] train episode 1326: reward = 91.00, steps = 91\n",
      "22:54:50 [DEBUG] train episode 1327: reward = 200.00, steps = 200\n",
      "22:54:50 [DEBUG] train episode 1328: reward = 200.00, steps = 200\n",
      "22:54:51 [DEBUG] train episode 1329: reward = 200.00, steps = 200\n",
      "22:54:51 [DEBUG] train episode 1330: reward = 141.00, steps = 141\n",
      "22:54:51 [DEBUG] train episode 1331: reward = 142.00, steps = 142\n",
      "22:54:51 [DEBUG] train episode 1332: reward = 173.00, steps = 173\n",
      "22:54:51 [DEBUG] train episode 1333: reward = 126.00, steps = 126\n",
      "22:54:51 [DEBUG] train episode 1334: reward = 86.00, steps = 86\n",
      "22:54:51 [DEBUG] train episode 1335: reward = 127.00, steps = 127\n",
      "22:54:51 [DEBUG] train episode 1336: reward = 200.00, steps = 200\n",
      "22:54:51 [DEBUG] train episode 1337: reward = 68.00, steps = 68\n",
      "22:54:51 [DEBUG] train episode 1338: reward = 200.00, steps = 200\n",
      "22:54:51 [DEBUG] train episode 1339: reward = 200.00, steps = 200\n",
      "22:54:51 [DEBUG] train episode 1340: reward = 147.00, steps = 147\n",
      "22:54:51 [DEBUG] train episode 1341: reward = 199.00, steps = 199\n",
      "22:54:51 [DEBUG] train episode 1342: reward = 139.00, steps = 139\n",
      "22:54:51 [DEBUG] train episode 1343: reward = 200.00, steps = 200\n",
      "22:54:52 [DEBUG] train episode 1344: reward = 200.00, steps = 200\n",
      "22:54:52 [DEBUG] train episode 1345: reward = 200.00, steps = 200\n",
      "22:54:52 [DEBUG] train episode 1346: reward = 135.00, steps = 135\n",
      "22:54:52 [DEBUG] train episode 1347: reward = 200.00, steps = 200\n",
      "22:54:52 [DEBUG] train episode 1348: reward = 200.00, steps = 200\n",
      "22:54:52 [DEBUG] train episode 1349: reward = 77.00, steps = 77\n",
      "22:54:52 [DEBUG] train episode 1350: reward = 161.00, steps = 161\n",
      "22:54:52 [DEBUG] train episode 1351: reward = 158.00, steps = 158\n",
      "22:54:52 [DEBUG] train episode 1352: reward = 200.00, steps = 200\n",
      "22:54:52 [DEBUG] train episode 1353: reward = 153.00, steps = 153\n",
      "22:54:52 [DEBUG] train episode 1354: reward = 44.00, steps = 44\n",
      "22:54:52 [DEBUG] train episode 1355: reward = 156.00, steps = 156\n",
      "22:54:52 [DEBUG] train episode 1356: reward = 200.00, steps = 200\n",
      "22:54:52 [DEBUG] train episode 1357: reward = 189.00, steps = 189\n",
      "22:54:53 [DEBUG] train episode 1358: reward = 200.00, steps = 200\n",
      "22:54:53 [DEBUG] train episode 1359: reward = 87.00, steps = 87\n",
      "22:54:53 [DEBUG] train episode 1360: reward = 200.00, steps = 200\n",
      "22:54:53 [DEBUG] train episode 1361: reward = 200.00, steps = 200\n",
      "22:54:53 [DEBUG] train episode 1362: reward = 190.00, steps = 190\n",
      "22:54:53 [DEBUG] train episode 1363: reward = 161.00, steps = 161\n",
      "22:54:53 [DEBUG] train episode 1364: reward = 200.00, steps = 200\n",
      "22:54:53 [DEBUG] train episode 1365: reward = 60.00, steps = 60\n",
      "22:54:53 [DEBUG] train episode 1366: reward = 138.00, steps = 138\n",
      "22:54:53 [DEBUG] train episode 1367: reward = 200.00, steps = 200\n",
      "22:54:53 [DEBUG] train episode 1368: reward = 127.00, steps = 127\n",
      "22:54:53 [DEBUG] train episode 1369: reward = 200.00, steps = 200\n",
      "22:54:53 [DEBUG] train episode 1370: reward = 117.00, steps = 117\n",
      "22:54:53 [DEBUG] train episode 1371: reward = 200.00, steps = 200\n",
      "22:54:54 [DEBUG] train episode 1372: reward = 185.00, steps = 185\n",
      "22:54:54 [DEBUG] train episode 1373: reward = 45.00, steps = 45\n",
      "22:54:54 [DEBUG] train episode 1374: reward = 200.00, steps = 200\n",
      "22:54:54 [DEBUG] train episode 1375: reward = 171.00, steps = 171\n",
      "22:54:54 [DEBUG] train episode 1376: reward = 175.00, steps = 175\n",
      "22:54:54 [DEBUG] train episode 1377: reward = 200.00, steps = 200\n",
      "22:54:54 [DEBUG] train episode 1378: reward = 134.00, steps = 134\n",
      "22:54:54 [DEBUG] train episode 1379: reward = 190.00, steps = 190\n",
      "22:54:54 [DEBUG] train episode 1380: reward = 172.00, steps = 172\n",
      "22:54:54 [DEBUG] train episode 1381: reward = 200.00, steps = 200\n",
      "22:54:54 [DEBUG] train episode 1382: reward = 103.00, steps = 103\n",
      "22:54:54 [DEBUG] train episode 1383: reward = 200.00, steps = 200\n",
      "22:54:54 [DEBUG] train episode 1384: reward = 200.00, steps = 200\n",
      "22:54:55 [DEBUG] train episode 1385: reward = 198.00, steps = 198\n",
      "22:54:55 [DEBUG] train episode 1386: reward = 200.00, steps = 200\n",
      "22:54:55 [DEBUG] train episode 1387: reward = 121.00, steps = 121\n",
      "22:54:55 [DEBUG] train episode 1388: reward = 200.00, steps = 200\n",
      "22:54:55 [DEBUG] train episode 1389: reward = 183.00, steps = 183\n",
      "22:54:55 [DEBUG] train episode 1390: reward = 124.00, steps = 124\n",
      "22:54:55 [DEBUG] train episode 1391: reward = 200.00, steps = 200\n",
      "22:54:55 [DEBUG] train episode 1392: reward = 195.00, steps = 195\n",
      "22:54:55 [DEBUG] train episode 1393: reward = 199.00, steps = 199\n",
      "22:54:55 [DEBUG] train episode 1394: reward = 158.00, steps = 158\n",
      "22:54:55 [DEBUG] train episode 1395: reward = 200.00, steps = 200\n",
      "22:54:55 [DEBUG] train episode 1396: reward = 132.00, steps = 132\n",
      "22:54:55 [DEBUG] train episode 1397: reward = 200.00, steps = 200\n",
      "22:54:56 [DEBUG] train episode 1398: reward = 165.00, steps = 165\n",
      "22:54:56 [DEBUG] train episode 1399: reward = 133.00, steps = 133\n",
      "22:54:56 [DEBUG] train episode 1400: reward = 200.00, steps = 200\n",
      "22:54:56 [DEBUG] train episode 1401: reward = 36.00, steps = 36\n",
      "22:54:56 [DEBUG] train episode 1402: reward = 200.00, steps = 200\n",
      "22:54:56 [DEBUG] train episode 1403: reward = 200.00, steps = 200\n",
      "22:54:56 [DEBUG] train episode 1404: reward = 111.00, steps = 111\n",
      "22:54:56 [DEBUG] train episode 1405: reward = 200.00, steps = 200\n",
      "22:54:56 [DEBUG] train episode 1406: reward = 133.00, steps = 133\n",
      "22:54:56 [DEBUG] train episode 1407: reward = 165.00, steps = 165\n",
      "22:54:56 [DEBUG] train episode 1408: reward = 195.00, steps = 195\n",
      "22:54:56 [DEBUG] train episode 1409: reward = 137.00, steps = 137\n",
      "22:54:56 [DEBUG] train episode 1410: reward = 169.00, steps = 169\n",
      "22:54:57 [DEBUG] train episode 1411: reward = 156.00, steps = 156\n",
      "22:54:57 [DEBUG] train episode 1412: reward = 29.00, steps = 29\n",
      "22:54:57 [DEBUG] train episode 1413: reward = 183.00, steps = 183\n",
      "22:54:57 [DEBUG] train episode 1414: reward = 166.00, steps = 166\n",
      "22:54:57 [DEBUG] train episode 1415: reward = 200.00, steps = 200\n",
      "22:54:57 [DEBUG] train episode 1416: reward = 186.00, steps = 186\n",
      "22:54:57 [DEBUG] train episode 1417: reward = 165.00, steps = 165\n",
      "22:54:57 [DEBUG] train episode 1418: reward = 177.00, steps = 177\n",
      "22:54:57 [DEBUG] train episode 1419: reward = 164.00, steps = 164\n",
      "22:54:57 [DEBUG] train episode 1420: reward = 200.00, steps = 200\n",
      "22:54:57 [DEBUG] train episode 1421: reward = 200.00, steps = 200\n",
      "22:54:57 [DEBUG] train episode 1422: reward = 162.00, steps = 162\n",
      "22:54:57 [DEBUG] train episode 1423: reward = 200.00, steps = 200\n",
      "22:54:57 [DEBUG] train episode 1424: reward = 87.00, steps = 87\n",
      "22:54:58 [DEBUG] train episode 1425: reward = 174.00, steps = 174\n",
      "22:54:58 [DEBUG] train episode 1426: reward = 200.00, steps = 200\n",
      "22:54:58 [DEBUG] train episode 1427: reward = 126.00, steps = 126\n",
      "22:54:58 [DEBUG] train episode 1428: reward = 179.00, steps = 179\n",
      "22:54:58 [DEBUG] train episode 1429: reward = 200.00, steps = 200\n",
      "22:54:58 [DEBUG] train episode 1430: reward = 200.00, steps = 200\n",
      "22:54:58 [DEBUG] train episode 1431: reward = 200.00, steps = 200\n",
      "22:54:58 [DEBUG] train episode 1432: reward = 41.00, steps = 41\n",
      "22:54:58 [DEBUG] train episode 1433: reward = 185.00, steps = 185\n",
      "22:54:58 [DEBUG] train episode 1434: reward = 122.00, steps = 122\n",
      "22:54:58 [DEBUG] train episode 1435: reward = 144.00, steps = 144\n",
      "22:54:58 [DEBUG] train episode 1436: reward = 200.00, steps = 200\n",
      "22:54:59 [DEBUG] train episode 1437: reward = 200.00, steps = 200\n",
      "22:54:59 [DEBUG] train episode 1438: reward = 65.00, steps = 65\n",
      "22:54:59 [DEBUG] train episode 1439: reward = 152.00, steps = 152\n",
      "22:54:59 [DEBUG] train episode 1440: reward = 200.00, steps = 200\n",
      "22:54:59 [DEBUG] train episode 1441: reward = 200.00, steps = 200\n",
      "22:54:59 [DEBUG] train episode 1442: reward = 200.00, steps = 200\n",
      "22:54:59 [DEBUG] train episode 1443: reward = 153.00, steps = 153\n",
      "22:54:59 [DEBUG] train episode 1444: reward = 34.00, steps = 34\n",
      "22:54:59 [DEBUG] train episode 1445: reward = 200.00, steps = 200\n",
      "22:54:59 [DEBUG] train episode 1446: reward = 200.00, steps = 200\n",
      "22:54:59 [DEBUG] train episode 1447: reward = 173.00, steps = 173\n",
      "22:54:59 [DEBUG] train episode 1448: reward = 200.00, steps = 200\n",
      "22:54:59 [DEBUG] train episode 1449: reward = 127.00, steps = 127\n",
      "22:54:59 [DEBUG] train episode 1450: reward = 194.00, steps = 194\n",
      "22:55:00 [DEBUG] train episode 1451: reward = 200.00, steps = 200\n",
      "22:55:00 [DEBUG] train episode 1452: reward = 50.00, steps = 50\n",
      "22:55:00 [DEBUG] train episode 1453: reward = 76.00, steps = 76\n",
      "22:55:00 [DEBUG] train episode 1454: reward = 200.00, steps = 200\n",
      "22:55:00 [DEBUG] train episode 1455: reward = 200.00, steps = 200\n",
      "22:55:00 [DEBUG] train episode 1456: reward = 168.00, steps = 168\n",
      "22:55:00 [DEBUG] train episode 1457: reward = 125.00, steps = 125\n",
      "22:55:00 [DEBUG] train episode 1458: reward = 178.00, steps = 178\n",
      "22:55:00 [DEBUG] train episode 1459: reward = 200.00, steps = 200\n",
      "22:55:00 [DEBUG] train episode 1460: reward = 72.00, steps = 72\n",
      "22:55:00 [DEBUG] train episode 1461: reward = 82.00, steps = 82\n",
      "22:55:00 [DEBUG] train episode 1462: reward = 137.00, steps = 137\n",
      "22:55:01 [DEBUG] train episode 1463: reward = 158.00, steps = 158\n",
      "22:55:01 [DEBUG] train episode 1464: reward = 200.00, steps = 200\n",
      "22:55:01 [DEBUG] train episode 1465: reward = 200.00, steps = 200\n",
      "22:55:01 [DEBUG] train episode 1466: reward = 200.00, steps = 200\n",
      "22:55:01 [DEBUG] train episode 1467: reward = 200.00, steps = 200\n",
      "22:55:01 [DEBUG] train episode 1468: reward = 200.00, steps = 200\n",
      "22:55:01 [DEBUG] train episode 1469: reward = 148.00, steps = 148\n",
      "22:55:01 [DEBUG] train episode 1470: reward = 200.00, steps = 200\n",
      "22:55:01 [DEBUG] train episode 1471: reward = 156.00, steps = 156\n",
      "22:55:01 [DEBUG] train episode 1472: reward = 200.00, steps = 200\n",
      "22:55:01 [DEBUG] train episode 1473: reward = 51.00, steps = 51\n",
      "22:55:01 [DEBUG] train episode 1474: reward = 173.00, steps = 173\n",
      "22:55:02 [DEBUG] train episode 1475: reward = 200.00, steps = 200\n",
      "22:55:02 [DEBUG] train episode 1476: reward = 200.00, steps = 200\n",
      "22:55:02 [DEBUG] train episode 1477: reward = 185.00, steps = 185\n",
      "22:55:02 [DEBUG] train episode 1478: reward = 90.00, steps = 90\n",
      "22:55:02 [DEBUG] train episode 1479: reward = 137.00, steps = 137\n",
      "22:55:02 [DEBUG] train episode 1480: reward = 130.00, steps = 130\n",
      "22:55:02 [DEBUG] train episode 1481: reward = 200.00, steps = 200\n",
      "22:55:02 [DEBUG] train episode 1482: reward = 111.00, steps = 111\n",
      "22:55:02 [DEBUG] train episode 1483: reward = 200.00, steps = 200\n",
      "22:55:02 [DEBUG] train episode 1484: reward = 111.00, steps = 111\n",
      "22:55:02 [DEBUG] train episode 1485: reward = 185.00, steps = 185\n",
      "22:55:02 [DEBUG] train episode 1486: reward = 113.00, steps = 113\n",
      "22:55:02 [DEBUG] train episode 1487: reward = 131.00, steps = 131\n",
      "22:55:02 [DEBUG] train episode 1488: reward = 176.00, steps = 176\n",
      "22:55:02 [DEBUG] train episode 1489: reward = 171.00, steps = 171\n",
      "22:55:03 [DEBUG] train episode 1490: reward = 185.00, steps = 185\n",
      "22:55:03 [DEBUG] train episode 1491: reward = 156.00, steps = 156\n",
      "22:55:03 [DEBUG] train episode 1492: reward = 200.00, steps = 200\n",
      "22:55:03 [DEBUG] train episode 1493: reward = 200.00, steps = 200\n",
      "22:55:03 [DEBUG] train episode 1494: reward = 200.00, steps = 200\n",
      "22:55:03 [DEBUG] train episode 1495: reward = 200.00, steps = 200\n",
      "22:55:03 [DEBUG] train episode 1496: reward = 200.00, steps = 200\n",
      "22:55:03 [DEBUG] train episode 1497: reward = 200.00, steps = 200\n",
      "22:55:03 [DEBUG] train episode 1498: reward = 200.00, steps = 200\n",
      "22:55:03 [DEBUG] train episode 1499: reward = 200.00, steps = 200\n",
      "22:55:03 [DEBUG] train episode 1500: reward = 200.00, steps = 200\n",
      "22:55:04 [DEBUG] train episode 1501: reward = 159.00, steps = 159\n",
      "22:55:04 [DEBUG] train episode 1502: reward = 174.00, steps = 174\n",
      "22:55:04 [DEBUG] train episode 1503: reward = 200.00, steps = 200\n",
      "22:55:04 [DEBUG] train episode 1504: reward = 200.00, steps = 200\n",
      "22:55:04 [DEBUG] train episode 1505: reward = 154.00, steps = 154\n",
      "22:55:04 [DEBUG] train episode 1506: reward = 123.00, steps = 123\n",
      "22:55:04 [DEBUG] train episode 1507: reward = 191.00, steps = 191\n",
      "22:55:04 [DEBUG] train episode 1508: reward = 158.00, steps = 158\n",
      "22:55:04 [DEBUG] train episode 1509: reward = 128.00, steps = 128\n",
      "22:55:04 [DEBUG] train episode 1510: reward = 200.00, steps = 200\n",
      "22:55:04 [DEBUG] train episode 1511: reward = 88.00, steps = 88\n",
      "22:55:04 [DEBUG] train episode 1512: reward = 200.00, steps = 200\n",
      "22:55:05 [DEBUG] train episode 1513: reward = 187.00, steps = 187\n",
      "22:55:05 [DEBUG] train episode 1514: reward = 200.00, steps = 200\n",
      "22:55:05 [DEBUG] train episode 1515: reward = 170.00, steps = 170\n",
      "22:55:05 [DEBUG] train episode 1516: reward = 194.00, steps = 194\n",
      "22:55:05 [DEBUG] train episode 1517: reward = 200.00, steps = 200\n",
      "22:55:05 [DEBUG] train episode 1518: reward = 200.00, steps = 200\n",
      "22:55:05 [DEBUG] train episode 1519: reward = 176.00, steps = 176\n",
      "22:55:05 [DEBUG] train episode 1520: reward = 142.00, steps = 142\n",
      "22:55:05 [DEBUG] train episode 1521: reward = 200.00, steps = 200\n",
      "22:55:05 [DEBUG] train episode 1522: reward = 200.00, steps = 200\n",
      "22:55:06 [DEBUG] train episode 1523: reward = 158.00, steps = 158\n",
      "22:55:06 [DEBUG] train episode 1524: reward = 200.00, steps = 200\n",
      "22:55:06 [DEBUG] train episode 1525: reward = 103.00, steps = 103\n",
      "22:55:06 [DEBUG] train episode 1526: reward = 186.00, steps = 186\n",
      "22:55:06 [DEBUG] train episode 1527: reward = 200.00, steps = 200\n",
      "22:55:06 [DEBUG] train episode 1528: reward = 144.00, steps = 144\n",
      "22:55:06 [DEBUG] train episode 1529: reward = 200.00, steps = 200\n",
      "22:55:06 [DEBUG] train episode 1530: reward = 200.00, steps = 200\n",
      "22:55:06 [DEBUG] train episode 1531: reward = 200.00, steps = 200\n",
      "22:55:06 [DEBUG] train episode 1532: reward = 200.00, steps = 200\n",
      "22:55:07 [DEBUG] train episode 1533: reward = 197.00, steps = 197\n",
      "22:55:07 [DEBUG] train episode 1534: reward = 132.00, steps = 132\n",
      "22:55:07 [DEBUG] train episode 1535: reward = 200.00, steps = 200\n",
      "22:55:07 [DEBUG] train episode 1536: reward = 113.00, steps = 113\n",
      "22:55:07 [DEBUG] train episode 1537: reward = 143.00, steps = 143\n",
      "22:55:07 [DEBUG] train episode 1538: reward = 200.00, steps = 200\n",
      "22:55:07 [DEBUG] train episode 1539: reward = 124.00, steps = 124\n",
      "22:55:07 [DEBUG] train episode 1540: reward = 36.00, steps = 36\n",
      "22:55:07 [DEBUG] train episode 1541: reward = 149.00, steps = 149\n",
      "22:55:07 [DEBUG] train episode 1542: reward = 147.00, steps = 147\n",
      "22:55:07 [DEBUG] train episode 1543: reward = 200.00, steps = 200\n",
      "22:55:07 [DEBUG] train episode 1544: reward = 74.00, steps = 74\n",
      "22:55:08 [DEBUG] train episode 1545: reward = 200.00, steps = 200\n",
      "22:55:08 [DEBUG] train episode 1546: reward = 200.00, steps = 200\n",
      "22:55:08 [DEBUG] train episode 1547: reward = 200.00, steps = 200\n",
      "22:55:08 [DEBUG] train episode 1548: reward = 200.00, steps = 200\n",
      "22:55:08 [DEBUG] train episode 1549: reward = 200.00, steps = 200\n",
      "22:55:08 [DEBUG] train episode 1550: reward = 164.00, steps = 164\n",
      "22:55:08 [DEBUG] train episode 1551: reward = 200.00, steps = 200\n",
      "22:55:08 [DEBUG] train episode 1552: reward = 48.00, steps = 48\n",
      "22:55:08 [DEBUG] train episode 1553: reward = 120.00, steps = 120\n",
      "22:55:08 [DEBUG] train episode 1554: reward = 172.00, steps = 172\n",
      "22:55:08 [DEBUG] train episode 1555: reward = 170.00, steps = 170\n",
      "22:55:08 [DEBUG] train episode 1556: reward = 151.00, steps = 151\n",
      "22:55:09 [DEBUG] train episode 1557: reward = 200.00, steps = 200\n",
      "22:55:09 [DEBUG] train episode 1558: reward = 200.00, steps = 200\n",
      "22:55:09 [DEBUG] train episode 1559: reward = 200.00, steps = 200\n",
      "22:55:09 [DEBUG] train episode 1560: reward = 154.00, steps = 154\n",
      "22:55:09 [DEBUG] train episode 1561: reward = 174.00, steps = 174\n",
      "22:55:09 [DEBUG] train episode 1562: reward = 200.00, steps = 200\n",
      "22:55:09 [DEBUG] train episode 1563: reward = 169.00, steps = 169\n",
      "22:55:09 [DEBUG] train episode 1564: reward = 200.00, steps = 200\n",
      "22:55:09 [DEBUG] train episode 1565: reward = 200.00, steps = 200\n",
      "22:55:09 [DEBUG] train episode 1566: reward = 159.00, steps = 159\n",
      "22:55:09 [DEBUG] train episode 1567: reward = 163.00, steps = 163\n",
      "22:55:10 [DEBUG] train episode 1568: reward = 144.00, steps = 144\n",
      "22:55:10 [DEBUG] train episode 1569: reward = 53.00, steps = 53\n",
      "22:55:10 [DEBUG] train episode 1570: reward = 181.00, steps = 181\n",
      "22:55:10 [DEBUG] train episode 1571: reward = 36.00, steps = 36\n",
      "22:55:10 [DEBUG] train episode 1572: reward = 200.00, steps = 200\n",
      "22:55:10 [DEBUG] train episode 1573: reward = 183.00, steps = 183\n",
      "22:55:10 [DEBUG] train episode 1574: reward = 200.00, steps = 200\n",
      "22:55:10 [DEBUG] train episode 1575: reward = 147.00, steps = 147\n",
      "22:55:10 [DEBUG] train episode 1576: reward = 200.00, steps = 200\n",
      "22:55:10 [DEBUG] train episode 1577: reward = 200.00, steps = 200\n",
      "22:55:10 [DEBUG] train episode 1578: reward = 160.00, steps = 160\n",
      "22:55:10 [DEBUG] train episode 1579: reward = 94.00, steps = 94\n",
      "22:55:10 [DEBUG] train episode 1580: reward = 200.00, steps = 200\n",
      "22:55:11 [DEBUG] train episode 1581: reward = 183.00, steps = 183\n",
      "22:55:11 [DEBUG] train episode 1582: reward = 191.00, steps = 191\n",
      "22:55:11 [DEBUG] train episode 1583: reward = 200.00, steps = 200\n",
      "22:55:11 [DEBUG] train episode 1584: reward = 200.00, steps = 200\n",
      "22:55:11 [DEBUG] train episode 1585: reward = 200.00, steps = 200\n",
      "22:55:11 [DEBUG] train episode 1586: reward = 200.00, steps = 200\n",
      "22:55:11 [DEBUG] train episode 1587: reward = 200.00, steps = 200\n",
      "22:55:11 [DEBUG] train episode 1588: reward = 200.00, steps = 200\n",
      "22:55:11 [DEBUG] train episode 1589: reward = 104.00, steps = 104\n",
      "22:55:11 [DEBUG] train episode 1590: reward = 200.00, steps = 200\n",
      "22:55:11 [DEBUG] train episode 1591: reward = 200.00, steps = 200\n",
      "22:55:11 [DEBUG] train episode 1592: reward = 200.00, steps = 200\n",
      "22:55:12 [DEBUG] train episode 1593: reward = 200.00, steps = 200\n",
      "22:55:12 [DEBUG] train episode 1594: reward = 37.00, steps = 37\n",
      "22:55:12 [DEBUG] train episode 1595: reward = 200.00, steps = 200\n",
      "22:55:12 [DEBUG] train episode 1596: reward = 200.00, steps = 200\n",
      "22:55:12 [DEBUG] train episode 1597: reward = 200.00, steps = 200\n",
      "22:55:12 [DEBUG] train episode 1598: reward = 200.00, steps = 200\n",
      "22:55:12 [DEBUG] train episode 1599: reward = 200.00, steps = 200\n",
      "22:55:12 [DEBUG] train episode 1600: reward = 200.00, steps = 200\n",
      "22:55:12 [DEBUG] train episode 1601: reward = 200.00, steps = 200\n",
      "22:55:13 [DEBUG] train episode 1602: reward = 200.00, steps = 200\n",
      "22:55:13 [DEBUG] train episode 1603: reward = 200.00, steps = 200\n",
      "22:55:13 [DEBUG] train episode 1604: reward = 200.00, steps = 200\n",
      "22:55:13 [DEBUG] train episode 1605: reward = 200.00, steps = 200\n",
      "22:55:13 [DEBUG] train episode 1606: reward = 200.00, steps = 200\n",
      "22:55:13 [DEBUG] train episode 1607: reward = 200.00, steps = 200\n",
      "22:55:13 [DEBUG] train episode 1608: reward = 147.00, steps = 147\n",
      "22:55:13 [DEBUG] train episode 1609: reward = 179.00, steps = 179\n",
      "22:55:13 [DEBUG] train episode 1610: reward = 200.00, steps = 200\n",
      "22:55:13 [DEBUG] train episode 1611: reward = 167.00, steps = 167\n",
      "22:55:13 [DEBUG] train episode 1612: reward = 200.00, steps = 200\n",
      "22:55:14 [DEBUG] train episode 1613: reward = 200.00, steps = 200\n",
      "22:55:14 [DEBUG] train episode 1614: reward = 200.00, steps = 200\n",
      "22:55:14 [DEBUG] train episode 1615: reward = 109.00, steps = 109\n",
      "22:55:14 [DEBUG] train episode 1616: reward = 200.00, steps = 200\n",
      "22:55:14 [DEBUG] train episode 1617: reward = 159.00, steps = 159\n",
      "22:55:14 [DEBUG] train episode 1618: reward = 200.00, steps = 200\n",
      "22:55:14 [DEBUG] train episode 1619: reward = 200.00, steps = 200\n",
      "22:55:14 [DEBUG] train episode 1620: reward = 200.00, steps = 200\n",
      "22:55:14 [DEBUG] train episode 1621: reward = 200.00, steps = 200\n",
      "22:55:14 [DEBUG] train episode 1622: reward = 200.00, steps = 200\n",
      "22:55:15 [DEBUG] train episode 1623: reward = 200.00, steps = 200\n",
      "22:55:15 [DEBUG] train episode 1624: reward = 173.00, steps = 173\n",
      "22:55:15 [DEBUG] train episode 1625: reward = 200.00, steps = 200\n",
      "22:55:15 [DEBUG] train episode 1626: reward = 120.00, steps = 120\n",
      "22:55:15 [DEBUG] train episode 1627: reward = 200.00, steps = 200\n",
      "22:55:15 [DEBUG] train episode 1628: reward = 200.00, steps = 200\n",
      "22:55:15 [DEBUG] train episode 1629: reward = 144.00, steps = 144\n",
      "22:55:15 [DEBUG] train episode 1630: reward = 200.00, steps = 200\n",
      "22:55:15 [DEBUG] train episode 1631: reward = 200.00, steps = 200\n",
      "22:55:15 [DEBUG] train episode 1632: reward = 163.00, steps = 163\n",
      "22:55:15 [DEBUG] train episode 1633: reward = 182.00, steps = 182\n",
      "22:55:16 [DEBUG] train episode 1634: reward = 200.00, steps = 200\n",
      "22:55:16 [DEBUG] train episode 1635: reward = 200.00, steps = 200\n",
      "22:55:16 [DEBUG] train episode 1636: reward = 200.00, steps = 200\n",
      "22:55:16 [DEBUG] train episode 1637: reward = 200.00, steps = 200\n",
      "22:55:16 [DEBUG] train episode 1638: reward = 200.00, steps = 200\n",
      "22:55:16 [DEBUG] train episode 1639: reward = 155.00, steps = 155\n",
      "22:55:16 [DEBUG] train episode 1640: reward = 200.00, steps = 200\n",
      "22:55:16 [DEBUG] train episode 1641: reward = 200.00, steps = 200\n",
      "22:55:16 [DEBUG] train episode 1642: reward = 200.00, steps = 200\n",
      "22:55:16 [DEBUG] train episode 1643: reward = 200.00, steps = 200\n",
      "22:55:16 [DEBUG] train episode 1644: reward = 175.00, steps = 175\n",
      "22:55:16 [DEBUG] train episode 1645: reward = 200.00, steps = 200\n",
      "22:55:17 [DEBUG] train episode 1646: reward = 200.00, steps = 200\n",
      "22:55:17 [DEBUG] train episode 1647: reward = 135.00, steps = 135\n",
      "22:55:17 [DEBUG] train episode 1648: reward = 200.00, steps = 200\n",
      "22:55:17 [DEBUG] train episode 1649: reward = 200.00, steps = 200\n",
      "22:55:17 [DEBUG] train episode 1650: reward = 200.00, steps = 200\n",
      "22:55:17 [DEBUG] train episode 1651: reward = 200.00, steps = 200\n",
      "22:55:17 [DEBUG] train episode 1652: reward = 200.00, steps = 200\n",
      "22:55:17 [DEBUG] train episode 1653: reward = 200.00, steps = 200\n",
      "22:55:17 [DEBUG] train episode 1654: reward = 200.00, steps = 200\n",
      "22:55:18 [DEBUG] train episode 1655: reward = 200.00, steps = 200\n",
      "22:55:18 [DEBUG] train episode 1656: reward = 200.00, steps = 200\n",
      "22:55:18 [DEBUG] train episode 1657: reward = 33.00, steps = 33\n",
      "22:55:18 [DEBUG] train episode 1658: reward = 200.00, steps = 200\n",
      "22:55:18 [DEBUG] train episode 1659: reward = 189.00, steps = 189\n",
      "22:55:18 [DEBUG] train episode 1660: reward = 96.00, steps = 96\n",
      "22:55:18 [DEBUG] train episode 1661: reward = 200.00, steps = 200\n",
      "22:55:18 [DEBUG] train episode 1662: reward = 152.00, steps = 152\n",
      "22:55:18 [DEBUG] train episode 1663: reward = 200.00, steps = 200\n",
      "22:55:18 [DEBUG] train episode 1664: reward = 200.00, steps = 200\n",
      "22:55:18 [DEBUG] train episode 1665: reward = 175.00, steps = 175\n",
      "22:55:19 [DEBUG] train episode 1666: reward = 200.00, steps = 200\n",
      "22:55:19 [DEBUG] train episode 1667: reward = 168.00, steps = 168\n",
      "22:55:19 [DEBUG] train episode 1668: reward = 200.00, steps = 200\n",
      "22:55:19 [DEBUG] train episode 1669: reward = 194.00, steps = 194\n",
      "22:55:19 [DEBUG] train episode 1670: reward = 200.00, steps = 200\n",
      "22:55:19 [DEBUG] train episode 1671: reward = 159.00, steps = 159\n",
      "22:55:19 [DEBUG] train episode 1672: reward = 200.00, steps = 200\n",
      "22:55:19 [DEBUG] train episode 1673: reward = 200.00, steps = 200\n",
      "22:55:19 [DEBUG] train episode 1674: reward = 135.00, steps = 135\n",
      "22:55:19 [DEBUG] train episode 1675: reward = 107.00, steps = 107\n",
      "22:55:20 [DEBUG] train episode 1676: reward = 200.00, steps = 200\n",
      "22:55:20 [DEBUG] train episode 1677: reward = 31.00, steps = 31\n",
      "22:55:20 [DEBUG] train episode 1678: reward = 53.00, steps = 53\n",
      "22:55:20 [DEBUG] train episode 1679: reward = 165.00, steps = 165\n",
      "22:55:20 [DEBUG] train episode 1680: reward = 18.00, steps = 18\n",
      "22:55:20 [DEBUG] train episode 1681: reward = 200.00, steps = 200\n",
      "22:55:20 [DEBUG] train episode 1682: reward = 200.00, steps = 200\n",
      "22:55:20 [DEBUG] train episode 1683: reward = 200.00, steps = 200\n",
      "22:55:20 [DEBUG] train episode 1684: reward = 200.00, steps = 200\n",
      "22:55:20 [DEBUG] train episode 1685: reward = 200.00, steps = 200\n",
      "22:55:20 [DEBUG] train episode 1686: reward = 200.00, steps = 200\n",
      "22:55:20 [DEBUG] train episode 1687: reward = 200.00, steps = 200\n",
      "22:55:20 [DEBUG] train episode 1688: reward = 200.00, steps = 200\n",
      "22:55:21 [DEBUG] train episode 1689: reward = 165.00, steps = 165\n",
      "22:55:21 [DEBUG] train episode 1690: reward = 108.00, steps = 108\n",
      "22:55:21 [DEBUG] train episode 1691: reward = 200.00, steps = 200\n",
      "22:55:21 [DEBUG] train episode 1692: reward = 200.00, steps = 200\n",
      "22:55:21 [DEBUG] train episode 1693: reward = 200.00, steps = 200\n",
      "22:55:21 [DEBUG] train episode 1694: reward = 44.00, steps = 44\n",
      "22:55:21 [DEBUG] train episode 1695: reward = 70.00, steps = 70\n",
      "22:55:21 [DEBUG] train episode 1696: reward = 200.00, steps = 200\n",
      "22:55:21 [DEBUG] train episode 1697: reward = 79.00, steps = 79\n",
      "22:55:21 [DEBUG] train episode 1698: reward = 200.00, steps = 200\n",
      "22:55:21 [DEBUG] train episode 1699: reward = 200.00, steps = 200\n",
      "22:55:21 [DEBUG] train episode 1700: reward = 128.00, steps = 128\n",
      "22:55:21 [DEBUG] train episode 1701: reward = 200.00, steps = 200\n",
      "22:55:21 [DEBUG] train episode 1702: reward = 131.00, steps = 131\n",
      "22:55:21 [DEBUG] train episode 1703: reward = 200.00, steps = 200\n",
      "22:55:22 [DEBUG] train episode 1704: reward = 142.00, steps = 142\n",
      "22:55:22 [DEBUG] train episode 1705: reward = 200.00, steps = 200\n",
      "22:55:22 [DEBUG] train episode 1706: reward = 200.00, steps = 200\n",
      "22:55:22 [DEBUG] train episode 1707: reward = 68.00, steps = 68\n",
      "22:55:22 [DEBUG] train episode 1708: reward = 200.00, steps = 200\n",
      "22:55:22 [DEBUG] train episode 1709: reward = 200.00, steps = 200\n",
      "22:55:22 [DEBUG] train episode 1710: reward = 200.00, steps = 200\n",
      "22:55:22 [DEBUG] train episode 1711: reward = 200.00, steps = 200\n",
      "22:55:22 [DEBUG] train episode 1712: reward = 200.00, steps = 200\n",
      "22:55:22 [DEBUG] train episode 1713: reward = 200.00, steps = 200\n",
      "22:55:22 [DEBUG] train episode 1714: reward = 200.00, steps = 200\n",
      "22:55:23 [DEBUG] train episode 1715: reward = 200.00, steps = 200\n",
      "22:55:23 [DEBUG] train episode 1716: reward = 200.00, steps = 200\n",
      "22:55:23 [DEBUG] train episode 1717: reward = 200.00, steps = 200\n",
      "22:55:23 [DEBUG] train episode 1718: reward = 61.00, steps = 61\n",
      "22:55:23 [DEBUG] train episode 1719: reward = 200.00, steps = 200\n",
      "22:55:23 [DEBUG] train episode 1720: reward = 200.00, steps = 200\n",
      "22:55:23 [DEBUG] train episode 1721: reward = 154.00, steps = 154\n",
      "22:55:23 [DEBUG] train episode 1722: reward = 200.00, steps = 200\n",
      "22:55:23 [DEBUG] train episode 1723: reward = 200.00, steps = 200\n",
      "22:55:23 [DEBUG] train episode 1724: reward = 200.00, steps = 200\n",
      "22:55:24 [DEBUG] train episode 1725: reward = 200.00, steps = 200\n",
      "22:55:24 [DEBUG] train episode 1726: reward = 200.00, steps = 200\n",
      "22:55:24 [DEBUG] train episode 1727: reward = 200.00, steps = 200\n",
      "22:55:24 [DEBUG] train episode 1728: reward = 200.00, steps = 200\n",
      "22:55:24 [DEBUG] train episode 1729: reward = 80.00, steps = 80\n",
      "22:55:24 [DEBUG] train episode 1730: reward = 182.00, steps = 182\n",
      "22:55:24 [DEBUG] train episode 1731: reward = 136.00, steps = 136\n",
      "22:55:24 [DEBUG] train episode 1732: reward = 200.00, steps = 200\n",
      "22:55:24 [DEBUG] train episode 1733: reward = 200.00, steps = 200\n",
      "22:55:24 [DEBUG] train episode 1734: reward = 200.00, steps = 200\n",
      "22:55:24 [DEBUG] train episode 1735: reward = 200.00, steps = 200\n",
      "22:55:24 [DEBUG] train episode 1736: reward = 82.00, steps = 82\n",
      "22:55:25 [DEBUG] train episode 1737: reward = 200.00, steps = 200\n",
      "22:55:25 [DEBUG] train episode 1738: reward = 177.00, steps = 177\n",
      "22:55:25 [DEBUG] train episode 1739: reward = 200.00, steps = 200\n",
      "22:55:25 [DEBUG] train episode 1740: reward = 200.00, steps = 200\n",
      "22:55:25 [DEBUG] train episode 1741: reward = 200.00, steps = 200\n",
      "22:55:25 [DEBUG] train episode 1742: reward = 193.00, steps = 193\n",
      "22:55:25 [DEBUG] train episode 1743: reward = 200.00, steps = 200\n",
      "22:55:25 [DEBUG] train episode 1744: reward = 76.00, steps = 76\n",
      "22:55:25 [DEBUG] train episode 1745: reward = 200.00, steps = 200\n",
      "22:55:26 [DEBUG] train episode 1746: reward = 200.00, steps = 200\n",
      "22:55:26 [DEBUG] train episode 1747: reward = 200.00, steps = 200\n",
      "22:55:26 [DEBUG] train episode 1748: reward = 200.00, steps = 200\n",
      "22:55:26 [DEBUG] train episode 1749: reward = 200.00, steps = 200\n",
      "22:55:26 [DEBUG] train episode 1750: reward = 154.00, steps = 154\n",
      "22:55:26 [DEBUG] train episode 1751: reward = 200.00, steps = 200\n",
      "22:55:26 [DEBUG] train episode 1752: reward = 199.00, steps = 199\n",
      "22:55:26 [DEBUG] train episode 1753: reward = 200.00, steps = 200\n",
      "22:55:26 [DEBUG] train episode 1754: reward = 200.00, steps = 200\n",
      "22:55:26 [DEBUG] train episode 1755: reward = 200.00, steps = 200\n",
      "22:55:26 [DEBUG] train episode 1756: reward = 71.00, steps = 71\n",
      "22:55:27 [DEBUG] train episode 1757: reward = 200.00, steps = 200\n",
      "22:55:27 [DEBUG] train episode 1758: reward = 135.00, steps = 135\n",
      "22:55:27 [DEBUG] train episode 1759: reward = 84.00, steps = 84\n",
      "22:55:27 [DEBUG] train episode 1760: reward = 195.00, steps = 195\n",
      "22:55:27 [DEBUG] train episode 1761: reward = 200.00, steps = 200\n",
      "22:55:27 [DEBUG] train episode 1762: reward = 200.00, steps = 200\n",
      "22:55:27 [DEBUG] train episode 1763: reward = 200.00, steps = 200\n",
      "22:55:27 [DEBUG] train episode 1764: reward = 173.00, steps = 173\n",
      "22:55:27 [DEBUG] train episode 1765: reward = 200.00, steps = 200\n",
      "22:55:27 [DEBUG] train episode 1766: reward = 200.00, steps = 200\n",
      "22:55:28 [DEBUG] train episode 1767: reward = 37.00, steps = 37\n",
      "22:55:28 [DEBUG] train episode 1768: reward = 37.00, steps = 37\n",
      "22:55:28 [DEBUG] train episode 1769: reward = 200.00, steps = 200\n",
      "22:55:28 [DEBUG] train episode 1770: reward = 29.00, steps = 29\n",
      "22:55:28 [DEBUG] train episode 1771: reward = 145.00, steps = 145\n",
      "22:55:28 [DEBUG] train episode 1772: reward = 118.00, steps = 118\n",
      "22:55:28 [DEBUG] train episode 1773: reward = 86.00, steps = 86\n",
      "22:55:28 [DEBUG] train episode 1774: reward = 200.00, steps = 200\n",
      "22:55:28 [DEBUG] train episode 1775: reward = 193.00, steps = 193\n",
      "22:55:28 [DEBUG] train episode 1776: reward = 200.00, steps = 200\n",
      "22:55:28 [DEBUG] train episode 1777: reward = 109.00, steps = 109\n",
      "22:55:28 [DEBUG] train episode 1778: reward = 200.00, steps = 200\n",
      "22:55:28 [DEBUG] train episode 1779: reward = 146.00, steps = 146\n",
      "22:55:28 [DEBUG] train episode 1780: reward = 200.00, steps = 200\n",
      "22:55:28 [DEBUG] train episode 1781: reward = 200.00, steps = 200\n",
      "22:55:29 [DEBUG] train episode 1782: reward = 175.00, steps = 175\n",
      "22:55:29 [DEBUG] train episode 1783: reward = 200.00, steps = 200\n",
      "22:55:29 [DEBUG] train episode 1784: reward = 147.00, steps = 147\n",
      "22:55:29 [DEBUG] train episode 1785: reward = 200.00, steps = 200\n",
      "22:55:29 [DEBUG] train episode 1786: reward = 181.00, steps = 181\n",
      "22:55:29 [DEBUG] train episode 1787: reward = 162.00, steps = 162\n",
      "22:55:29 [DEBUG] train episode 1788: reward = 52.00, steps = 52\n",
      "22:55:29 [DEBUG] train episode 1789: reward = 200.00, steps = 200\n",
      "22:55:29 [DEBUG] train episode 1790: reward = 200.00, steps = 200\n",
      "22:55:29 [DEBUG] train episode 1791: reward = 200.00, steps = 200\n",
      "22:55:29 [DEBUG] train episode 1792: reward = 200.00, steps = 200\n",
      "22:55:29 [DEBUG] train episode 1793: reward = 21.00, steps = 21\n",
      "22:55:30 [DEBUG] train episode 1794: reward = 200.00, steps = 200\n",
      "22:55:30 [DEBUG] train episode 1795: reward = 200.00, steps = 200\n",
      "22:55:30 [DEBUG] train episode 1796: reward = 133.00, steps = 133\n",
      "22:55:30 [DEBUG] train episode 1797: reward = 140.00, steps = 140\n",
      "22:55:30 [DEBUG] train episode 1798: reward = 198.00, steps = 198\n",
      "22:55:30 [DEBUG] train episode 1799: reward = 200.00, steps = 200\n",
      "22:55:30 [DEBUG] train episode 1800: reward = 200.00, steps = 200\n",
      "22:55:30 [DEBUG] train episode 1801: reward = 200.00, steps = 200\n",
      "22:55:30 [DEBUG] train episode 1802: reward = 175.00, steps = 175\n",
      "22:55:30 [DEBUG] train episode 1803: reward = 200.00, steps = 200\n",
      "22:55:31 [DEBUG] train episode 1804: reward = 200.00, steps = 200\n",
      "22:55:31 [DEBUG] train episode 1805: reward = 58.00, steps = 58\n",
      "22:55:31 [DEBUG] train episode 1806: reward = 200.00, steps = 200\n",
      "22:55:31 [DEBUG] train episode 1807: reward = 200.00, steps = 200\n",
      "22:55:31 [DEBUG] train episode 1808: reward = 200.00, steps = 200\n",
      "22:55:31 [DEBUG] train episode 1809: reward = 200.00, steps = 200\n",
      "22:55:31 [DEBUG] train episode 1810: reward = 200.00, steps = 200\n",
      "22:55:31 [DEBUG] train episode 1811: reward = 159.00, steps = 159\n",
      "22:55:31 [DEBUG] train episode 1812: reward = 200.00, steps = 200\n",
      "22:55:32 [DEBUG] train episode 1813: reward = 200.00, steps = 200\n",
      "22:55:32 [DEBUG] train episode 1814: reward = 200.00, steps = 200\n",
      "22:55:32 [DEBUG] train episode 1815: reward = 161.00, steps = 161\n",
      "22:55:32 [DEBUG] train episode 1816: reward = 101.00, steps = 101\n",
      "22:55:32 [DEBUG] train episode 1817: reward = 200.00, steps = 200\n",
      "22:55:32 [DEBUG] train episode 1818: reward = 137.00, steps = 137\n",
      "22:55:32 [DEBUG] train episode 1819: reward = 103.00, steps = 103\n",
      "22:55:32 [DEBUG] train episode 1820: reward = 200.00, steps = 200\n",
      "22:55:32 [DEBUG] train episode 1821: reward = 200.00, steps = 200\n",
      "22:55:32 [DEBUG] train episode 1822: reward = 63.00, steps = 63\n",
      "22:55:32 [DEBUG] train episode 1823: reward = 200.00, steps = 200\n",
      "22:55:32 [DEBUG] train episode 1824: reward = 129.00, steps = 129\n",
      "22:55:33 [DEBUG] train episode 1825: reward = 200.00, steps = 200\n",
      "22:55:33 [DEBUG] train episode 1826: reward = 200.00, steps = 200\n",
      "22:55:33 [DEBUG] train episode 1827: reward = 36.00, steps = 36\n",
      "22:55:33 [DEBUG] train episode 1828: reward = 185.00, steps = 185\n",
      "22:55:33 [DEBUG] train episode 1829: reward = 200.00, steps = 200\n",
      "22:55:33 [DEBUG] train episode 1830: reward = 200.00, steps = 200\n",
      "22:55:33 [DEBUG] train episode 1831: reward = 200.00, steps = 200\n",
      "22:55:33 [DEBUG] train episode 1832: reward = 200.00, steps = 200\n",
      "22:55:33 [DEBUG] train episode 1833: reward = 178.00, steps = 178\n",
      "22:55:33 [DEBUG] train episode 1834: reward = 200.00, steps = 200\n",
      "22:55:33 [DEBUG] train episode 1835: reward = 200.00, steps = 200\n",
      "22:55:34 [DEBUG] train episode 1836: reward = 200.00, steps = 200\n",
      "22:55:34 [DEBUG] train episode 1837: reward = 153.00, steps = 153\n",
      "22:55:34 [DEBUG] train episode 1838: reward = 200.00, steps = 200\n",
      "22:55:34 [DEBUG] train episode 1839: reward = 180.00, steps = 180\n",
      "22:55:34 [DEBUG] train episode 1840: reward = 200.00, steps = 200\n",
      "22:55:34 [DEBUG] train episode 1841: reward = 200.00, steps = 200\n",
      "22:55:34 [DEBUG] train episode 1842: reward = 200.00, steps = 200\n",
      "22:55:34 [DEBUG] train episode 1843: reward = 66.00, steps = 66\n",
      "22:55:34 [DEBUG] train episode 1844: reward = 18.00, steps = 18\n",
      "22:55:34 [DEBUG] train episode 1845: reward = 200.00, steps = 200\n",
      "22:55:34 [DEBUG] train episode 1846: reward = 200.00, steps = 200\n",
      "22:55:35 [DEBUG] train episode 1847: reward = 187.00, steps = 187\n",
      "22:55:35 [DEBUG] train episode 1848: reward = 200.00, steps = 200\n",
      "22:55:35 [DEBUG] train episode 1849: reward = 100.00, steps = 100\n",
      "22:55:35 [DEBUG] train episode 1850: reward = 200.00, steps = 200\n",
      "22:55:35 [DEBUG] train episode 1851: reward = 30.00, steps = 30\n",
      "22:55:35 [DEBUG] train episode 1852: reward = 106.00, steps = 106\n",
      "22:55:35 [DEBUG] train episode 1853: reward = 200.00, steps = 200\n",
      "22:55:35 [DEBUG] train episode 1854: reward = 162.00, steps = 162\n",
      "22:55:35 [DEBUG] train episode 1855: reward = 200.00, steps = 200\n",
      "22:55:35 [DEBUG] train episode 1856: reward = 200.00, steps = 200\n",
      "22:55:35 [DEBUG] train episode 1857: reward = 200.00, steps = 200\n",
      "22:55:35 [DEBUG] train episode 1858: reward = 23.00, steps = 23\n",
      "22:55:36 [DEBUG] train episode 1859: reward = 175.00, steps = 175\n",
      "22:55:36 [DEBUG] train episode 1860: reward = 27.00, steps = 27\n",
      "22:55:36 [DEBUG] train episode 1861: reward = 156.00, steps = 156\n",
      "22:55:36 [DEBUG] train episode 1862: reward = 200.00, steps = 200\n",
      "22:55:36 [DEBUG] train episode 1863: reward = 200.00, steps = 200\n",
      "22:55:36 [DEBUG] train episode 1864: reward = 200.00, steps = 200\n",
      "22:55:36 [DEBUG] train episode 1865: reward = 200.00, steps = 200\n",
      "22:55:36 [DEBUG] train episode 1866: reward = 155.00, steps = 155\n",
      "22:55:36 [DEBUG] train episode 1867: reward = 175.00, steps = 175\n",
      "22:55:36 [DEBUG] train episode 1868: reward = 200.00, steps = 200\n",
      "22:55:36 [DEBUG] train episode 1869: reward = 200.00, steps = 200\n",
      "22:55:37 [DEBUG] train episode 1870: reward = 176.00, steps = 176\n",
      "22:55:37 [DEBUG] train episode 1871: reward = 200.00, steps = 200\n",
      "22:55:37 [DEBUG] train episode 1872: reward = 199.00, steps = 199\n",
      "22:55:37 [DEBUG] train episode 1873: reward = 179.00, steps = 179\n",
      "22:55:37 [DEBUG] train episode 1874: reward = 170.00, steps = 170\n",
      "22:55:37 [DEBUG] train episode 1875: reward = 200.00, steps = 200\n",
      "22:55:37 [DEBUG] train episode 1876: reward = 200.00, steps = 200\n",
      "22:55:37 [DEBUG] train episode 1877: reward = 116.00, steps = 116\n",
      "22:55:37 [DEBUG] train episode 1878: reward = 138.00, steps = 138\n",
      "22:55:37 [DEBUG] train episode 1879: reward = 19.00, steps = 19\n",
      "22:55:37 [DEBUG] train episode 1880: reward = 152.00, steps = 152\n",
      "22:55:37 [DEBUG] train episode 1881: reward = 157.00, steps = 157\n",
      "22:55:37 [DEBUG] train episode 1882: reward = 141.00, steps = 141\n",
      "22:55:38 [DEBUG] train episode 1883: reward = 133.00, steps = 133\n",
      "22:55:38 [DEBUG] train episode 1884: reward = 200.00, steps = 200\n",
      "22:55:38 [DEBUG] train episode 1885: reward = 200.00, steps = 200\n",
      "22:55:38 [DEBUG] train episode 1886: reward = 200.00, steps = 200\n",
      "22:55:38 [DEBUG] train episode 1887: reward = 171.00, steps = 171\n",
      "22:55:38 [DEBUG] train episode 1888: reward = 121.00, steps = 121\n",
      "22:55:38 [DEBUG] train episode 1889: reward = 191.00, steps = 191\n",
      "22:55:38 [DEBUG] train episode 1890: reward = 200.00, steps = 200\n",
      "22:55:38 [DEBUG] train episode 1891: reward = 172.00, steps = 172\n",
      "22:55:38 [DEBUG] train episode 1892: reward = 200.00, steps = 200\n",
      "22:55:39 [DEBUG] train episode 1893: reward = 136.00, steps = 136\n",
      "22:55:39 [DEBUG] train episode 1894: reward = 166.00, steps = 166\n",
      "22:55:39 [DEBUG] train episode 1895: reward = 200.00, steps = 200\n",
      "22:55:39 [DEBUG] train episode 1896: reward = 200.00, steps = 200\n",
      "22:55:39 [DEBUG] train episode 1897: reward = 113.00, steps = 113\n",
      "22:55:39 [DEBUG] train episode 1898: reward = 162.00, steps = 162\n",
      "22:55:39 [DEBUG] train episode 1899: reward = 200.00, steps = 200\n",
      "22:55:39 [DEBUG] train episode 1900: reward = 159.00, steps = 159\n",
      "22:55:39 [DEBUG] train episode 1901: reward = 200.00, steps = 200\n",
      "22:55:39 [DEBUG] train episode 1902: reward = 35.00, steps = 35\n",
      "22:55:39 [DEBUG] train episode 1903: reward = 141.00, steps = 141\n",
      "22:55:39 [DEBUG] train episode 1904: reward = 200.00, steps = 200\n",
      "22:55:40 [DEBUG] train episode 1905: reward = 185.00, steps = 185\n",
      "22:55:40 [DEBUG] train episode 1906: reward = 29.00, steps = 29\n",
      "22:55:40 [DEBUG] train episode 1907: reward = 136.00, steps = 136\n",
      "22:55:40 [DEBUG] train episode 1908: reward = 200.00, steps = 200\n",
      "22:55:40 [DEBUG] train episode 1909: reward = 200.00, steps = 200\n",
      "22:55:40 [DEBUG] train episode 1910: reward = 200.00, steps = 200\n",
      "22:55:40 [DEBUG] train episode 1911: reward = 200.00, steps = 200\n",
      "22:55:40 [DEBUG] train episode 1912: reward = 200.00, steps = 200\n",
      "22:55:40 [DEBUG] train episode 1913: reward = 200.00, steps = 200\n",
      "22:55:40 [DEBUG] train episode 1914: reward = 200.00, steps = 200\n",
      "22:55:40 [DEBUG] train episode 1915: reward = 140.00, steps = 140\n",
      "22:55:41 [DEBUG] train episode 1916: reward = 155.00, steps = 155\n",
      "22:55:41 [DEBUG] train episode 1917: reward = 200.00, steps = 200\n",
      "22:55:41 [DEBUG] train episode 1918: reward = 200.00, steps = 200\n",
      "22:55:41 [DEBUG] train episode 1919: reward = 74.00, steps = 74\n",
      "22:55:41 [DEBUG] train episode 1920: reward = 200.00, steps = 200\n",
      "22:55:41 [DEBUG] train episode 1921: reward = 84.00, steps = 84\n",
      "22:55:41 [DEBUG] train episode 1922: reward = 200.00, steps = 200\n",
      "22:55:41 [DEBUG] train episode 1923: reward = 160.00, steps = 160\n",
      "22:55:41 [DEBUG] train episode 1924: reward = 200.00, steps = 200\n",
      "22:55:41 [DEBUG] train episode 1925: reward = 200.00, steps = 200\n",
      "22:55:41 [DEBUG] train episode 1926: reward = 101.00, steps = 101\n",
      "22:55:42 [DEBUG] train episode 1927: reward = 200.00, steps = 200\n",
      "22:55:42 [DEBUG] train episode 1928: reward = 200.00, steps = 200\n",
      "22:55:42 [DEBUG] train episode 1929: reward = 188.00, steps = 188\n",
      "22:55:42 [DEBUG] train episode 1930: reward = 155.00, steps = 155\n",
      "22:55:42 [DEBUG] train episode 1931: reward = 178.00, steps = 178\n",
      "22:55:42 [DEBUG] train episode 1932: reward = 32.00, steps = 32\n",
      "22:55:42 [DEBUG] train episode 1933: reward = 200.00, steps = 200\n",
      "22:55:42 [DEBUG] train episode 1934: reward = 118.00, steps = 118\n",
      "22:55:42 [DEBUG] train episode 1935: reward = 177.00, steps = 177\n",
      "22:55:42 [DEBUG] train episode 1936: reward = 200.00, steps = 200\n",
      "22:55:42 [DEBUG] train episode 1937: reward = 200.00, steps = 200\n",
      "22:55:42 [DEBUG] train episode 1938: reward = 200.00, steps = 200\n",
      "22:55:43 [DEBUG] train episode 1939: reward = 200.00, steps = 200\n",
      "22:55:43 [DEBUG] train episode 1940: reward = 154.00, steps = 154\n",
      "22:55:43 [DEBUG] train episode 1941: reward = 200.00, steps = 200\n",
      "22:55:43 [DEBUG] train episode 1942: reward = 200.00, steps = 200\n",
      "22:55:43 [DEBUG] train episode 1943: reward = 160.00, steps = 160\n",
      "22:55:43 [DEBUG] train episode 1944: reward = 200.00, steps = 200\n",
      "22:55:43 [DEBUG] train episode 1945: reward = 200.00, steps = 200\n",
      "22:55:43 [DEBUG] train episode 1946: reward = 197.00, steps = 197\n",
      "22:55:43 [DEBUG] train episode 1947: reward = 120.00, steps = 120\n",
      "22:55:43 [DEBUG] train episode 1948: reward = 200.00, steps = 200\n",
      "22:55:43 [DEBUG] train episode 1949: reward = 200.00, steps = 200\n",
      "22:55:44 [DEBUG] train episode 1950: reward = 200.00, steps = 200\n",
      "22:55:44 [DEBUG] train episode 1951: reward = 200.00, steps = 200\n",
      "22:55:44 [DEBUG] train episode 1952: reward = 132.00, steps = 132\n",
      "22:55:44 [DEBUG] train episode 1953: reward = 200.00, steps = 200\n",
      "22:55:44 [DEBUG] train episode 1954: reward = 200.00, steps = 200\n",
      "22:55:44 [DEBUG] train episode 1955: reward = 200.00, steps = 200\n",
      "22:55:44 [DEBUG] train episode 1956: reward = 200.00, steps = 200\n",
      "22:55:44 [DEBUG] train episode 1957: reward = 200.00, steps = 200\n",
      "22:55:44 [DEBUG] train episode 1958: reward = 142.00, steps = 142\n",
      "22:55:44 [DEBUG] train episode 1959: reward = 200.00, steps = 200\n",
      "22:55:45 [DEBUG] train episode 1960: reward = 174.00, steps = 174\n",
      "22:55:45 [DEBUG] train episode 1961: reward = 34.00, steps = 34\n",
      "22:55:45 [DEBUG] train episode 1962: reward = 101.00, steps = 101\n",
      "22:55:45 [DEBUG] train episode 1963: reward = 200.00, steps = 200\n",
      "22:55:45 [DEBUG] train episode 1964: reward = 200.00, steps = 200\n",
      "22:55:45 [DEBUG] train episode 1965: reward = 200.00, steps = 200\n",
      "22:55:45 [DEBUG] train episode 1966: reward = 163.00, steps = 163\n",
      "22:55:45 [DEBUG] train episode 1967: reward = 200.00, steps = 200\n",
      "22:55:45 [DEBUG] train episode 1968: reward = 200.00, steps = 200\n",
      "22:55:45 [DEBUG] train episode 1969: reward = 174.00, steps = 174\n",
      "22:55:45 [DEBUG] train episode 1970: reward = 167.00, steps = 167\n",
      "22:55:45 [DEBUG] train episode 1971: reward = 200.00, steps = 200\n",
      "22:55:46 [DEBUG] train episode 1972: reward = 200.00, steps = 200\n",
      "22:55:46 [DEBUG] train episode 1973: reward = 200.00, steps = 200\n",
      "22:55:46 [DEBUG] train episode 1974: reward = 190.00, steps = 190\n",
      "22:55:46 [DEBUG] train episode 1975: reward = 200.00, steps = 200\n",
      "22:55:46 [DEBUG] train episode 1976: reward = 200.00, steps = 200\n",
      "22:55:46 [DEBUG] train episode 1977: reward = 200.00, steps = 200\n",
      "22:55:46 [DEBUG] train episode 1978: reward = 113.00, steps = 113\n",
      "22:55:46 [DEBUG] train episode 1979: reward = 190.00, steps = 190\n",
      "22:55:46 [DEBUG] train episode 1980: reward = 200.00, steps = 200\n",
      "22:55:47 [DEBUG] train episode 1981: reward = 186.00, steps = 186\n",
      "22:55:47 [DEBUG] train episode 1982: reward = 179.00, steps = 179\n",
      "22:55:47 [DEBUG] train episode 1983: reward = 200.00, steps = 200\n",
      "22:55:47 [DEBUG] train episode 1984: reward = 196.00, steps = 196\n",
      "22:55:47 [DEBUG] train episode 1985: reward = 127.00, steps = 127\n",
      "22:55:47 [DEBUG] train episode 1986: reward = 188.00, steps = 188\n",
      "22:55:47 [DEBUG] train episode 1987: reward = 200.00, steps = 200\n",
      "22:55:47 [DEBUG] train episode 1988: reward = 175.00, steps = 175\n",
      "22:55:47 [DEBUG] train episode 1989: reward = 200.00, steps = 200\n",
      "22:55:47 [DEBUG] train episode 1990: reward = 200.00, steps = 200\n",
      "22:55:48 [DEBUG] train episode 1991: reward = 200.00, steps = 200\n",
      "22:55:48 [DEBUG] train episode 1992: reward = 181.00, steps = 181\n",
      "22:55:48 [DEBUG] train episode 1993: reward = 200.00, steps = 200\n",
      "22:55:48 [DEBUG] train episode 1994: reward = 200.00, steps = 200\n",
      "22:55:48 [DEBUG] train episode 1995: reward = 200.00, steps = 200\n",
      "22:55:48 [DEBUG] train episode 1996: reward = 58.00, steps = 58\n",
      "22:55:48 [DEBUG] train episode 1997: reward = 200.00, steps = 200\n",
      "22:55:48 [DEBUG] train episode 1998: reward = 200.00, steps = 200\n",
      "22:55:48 [DEBUG] train episode 1999: reward = 35.00, steps = 35\n",
      "22:55:48 [DEBUG] train episode 2000: reward = 29.00, steps = 29\n",
      "22:55:48 [DEBUG] train episode 2001: reward = 124.00, steps = 124\n",
      "22:55:48 [DEBUG] train episode 2002: reward = 170.00, steps = 170\n",
      "22:55:49 [DEBUG] train episode 2003: reward = 140.00, steps = 140\n",
      "22:55:49 [DEBUG] train episode 2004: reward = 200.00, steps = 200\n",
      "22:55:49 [DEBUG] train episode 2005: reward = 165.00, steps = 165\n",
      "22:55:49 [DEBUG] train episode 2006: reward = 200.00, steps = 200\n",
      "22:55:49 [DEBUG] train episode 2007: reward = 117.00, steps = 117\n",
      "22:55:49 [DEBUG] train episode 2008: reward = 172.00, steps = 172\n",
      "22:55:49 [DEBUG] train episode 2009: reward = 200.00, steps = 200\n",
      "22:55:49 [DEBUG] train episode 2010: reward = 200.00, steps = 200\n",
      "22:55:49 [DEBUG] train episode 2011: reward = 183.00, steps = 183\n",
      "22:55:49 [DEBUG] train episode 2012: reward = 145.00, steps = 145\n",
      "22:55:50 [DEBUG] train episode 2013: reward = 200.00, steps = 200\n",
      "22:55:50 [DEBUG] train episode 2014: reward = 26.00, steps = 26\n",
      "22:55:50 [DEBUG] train episode 2015: reward = 200.00, steps = 200\n",
      "22:55:50 [DEBUG] train episode 2016: reward = 179.00, steps = 179\n",
      "22:55:50 [DEBUG] train episode 2017: reward = 192.00, steps = 192\n",
      "22:55:50 [DEBUG] train episode 2018: reward = 200.00, steps = 200\n",
      "22:55:50 [DEBUG] train episode 2019: reward = 175.00, steps = 175\n",
      "22:55:50 [DEBUG] train episode 2020: reward = 200.00, steps = 200\n",
      "22:55:50 [DEBUG] train episode 2021: reward = 200.00, steps = 200\n",
      "22:55:50 [DEBUG] train episode 2022: reward = 200.00, steps = 200\n",
      "22:55:50 [DEBUG] train episode 2023: reward = 20.00, steps = 20\n",
      "22:55:51 [DEBUG] train episode 2024: reward = 158.00, steps = 158\n",
      "22:55:51 [DEBUG] train episode 2025: reward = 114.00, steps = 114\n",
      "22:55:51 [DEBUG] train episode 2026: reward = 121.00, steps = 121\n",
      "22:55:51 [DEBUG] train episode 2027: reward = 200.00, steps = 200\n",
      "22:55:51 [DEBUG] train episode 2028: reward = 200.00, steps = 200\n",
      "22:55:51 [DEBUG] train episode 2029: reward = 200.00, steps = 200\n",
      "22:55:51 [DEBUG] train episode 2030: reward = 200.00, steps = 200\n",
      "22:55:51 [DEBUG] train episode 2031: reward = 198.00, steps = 198\n",
      "22:55:51 [DEBUG] train episode 2032: reward = 200.00, steps = 200\n",
      "22:55:51 [DEBUG] train episode 2033: reward = 129.00, steps = 129\n",
      "22:55:52 [DEBUG] train episode 2034: reward = 195.00, steps = 195\n",
      "22:55:52 [DEBUG] train episode 2035: reward = 67.00, steps = 67\n",
      "22:55:52 [DEBUG] train episode 2036: reward = 200.00, steps = 200\n",
      "22:55:52 [DEBUG] train episode 2037: reward = 177.00, steps = 177\n",
      "22:55:52 [DEBUG] train episode 2038: reward = 200.00, steps = 200\n",
      "22:55:52 [DEBUG] train episode 2039: reward = 200.00, steps = 200\n",
      "22:55:52 [DEBUG] train episode 2040: reward = 200.00, steps = 200\n",
      "22:55:52 [DEBUG] train episode 2041: reward = 200.00, steps = 200\n",
      "22:55:52 [DEBUG] train episode 2042: reward = 199.00, steps = 199\n",
      "22:55:52 [DEBUG] train episode 2043: reward = 200.00, steps = 200\n",
      "22:55:52 [DEBUG] train episode 2044: reward = 62.00, steps = 62\n",
      "22:55:52 [DEBUG] train episode 2045: reward = 180.00, steps = 180\n",
      "22:55:53 [DEBUG] train episode 2046: reward = 193.00, steps = 193\n",
      "22:55:53 [DEBUG] train episode 2047: reward = 200.00, steps = 200\n",
      "22:55:53 [DEBUG] train episode 2048: reward = 200.00, steps = 200\n",
      "22:55:53 [DEBUG] train episode 2049: reward = 200.00, steps = 200\n",
      "22:55:53 [DEBUG] train episode 2050: reward = 16.00, steps = 16\n",
      "22:55:53 [DEBUG] train episode 2051: reward = 117.00, steps = 117\n",
      "22:55:53 [DEBUG] train episode 2052: reward = 200.00, steps = 200\n",
      "22:55:53 [DEBUG] train episode 2053: reward = 200.00, steps = 200\n",
      "22:55:53 [DEBUG] train episode 2054: reward = 200.00, steps = 200\n",
      "22:55:53 [DEBUG] train episode 2055: reward = 200.00, steps = 200\n",
      "22:55:53 [DEBUG] train episode 2056: reward = 200.00, steps = 200\n",
      "22:55:54 [DEBUG] train episode 2057: reward = 138.00, steps = 138\n",
      "22:55:54 [DEBUG] train episode 2058: reward = 200.00, steps = 200\n",
      "22:55:54 [DEBUG] train episode 2059: reward = 139.00, steps = 139\n",
      "22:55:54 [DEBUG] train episode 2060: reward = 197.00, steps = 197\n",
      "22:55:54 [DEBUG] train episode 2061: reward = 67.00, steps = 67\n",
      "22:55:54 [DEBUG] train episode 2062: reward = 200.00, steps = 200\n",
      "22:55:54 [DEBUG] train episode 2063: reward = 200.00, steps = 200\n",
      "22:55:54 [DEBUG] train episode 2064: reward = 200.00, steps = 200\n",
      "22:55:54 [DEBUG] train episode 2065: reward = 200.00, steps = 200\n",
      "22:55:54 [DEBUG] train episode 2066: reward = 180.00, steps = 180\n",
      "22:55:54 [DEBUG] train episode 2067: reward = 200.00, steps = 200\n",
      "22:55:55 [DEBUG] train episode 2068: reward = 180.00, steps = 180\n",
      "22:55:55 [DEBUG] train episode 2069: reward = 173.00, steps = 173\n",
      "22:55:55 [DEBUG] train episode 2070: reward = 200.00, steps = 200\n",
      "22:55:55 [DEBUG] train episode 2071: reward = 167.00, steps = 167\n",
      "22:55:55 [DEBUG] train episode 2072: reward = 198.00, steps = 198\n",
      "22:55:55 [DEBUG] train episode 2073: reward = 200.00, steps = 200\n",
      "22:55:55 [DEBUG] train episode 2074: reward = 183.00, steps = 183\n",
      "22:55:55 [DEBUG] train episode 2075: reward = 200.00, steps = 200\n",
      "22:55:55 [DEBUG] train episode 2076: reward = 200.00, steps = 200\n",
      "22:55:55 [DEBUG] train episode 2077: reward = 200.00, steps = 200\n",
      "22:55:56 [DEBUG] train episode 2078: reward = 200.00, steps = 200\n",
      "22:55:56 [DEBUG] train episode 2079: reward = 200.00, steps = 200\n",
      "22:55:56 [DEBUG] train episode 2080: reward = 183.00, steps = 183\n",
      "22:55:56 [DEBUG] train episode 2081: reward = 200.00, steps = 200\n",
      "22:55:56 [DEBUG] train episode 2082: reward = 200.00, steps = 200\n",
      "22:55:56 [DEBUG] train episode 2083: reward = 200.00, steps = 200\n",
      "22:55:56 [DEBUG] train episode 2084: reward = 200.00, steps = 200\n",
      "22:55:56 [DEBUG] train episode 2085: reward = 178.00, steps = 178\n",
      "22:55:56 [DEBUG] train episode 2086: reward = 200.00, steps = 200\n",
      "22:55:56 [DEBUG] train episode 2087: reward = 182.00, steps = 182\n",
      "22:55:57 [DEBUG] train episode 2088: reward = 200.00, steps = 200\n",
      "22:55:57 [DEBUG] train episode 2089: reward = 200.00, steps = 200\n",
      "22:55:57 [DEBUG] train episode 2090: reward = 200.00, steps = 200\n",
      "22:55:57 [DEBUG] train episode 2091: reward = 193.00, steps = 193\n",
      "22:55:57 [DEBUG] train episode 2092: reward = 176.00, steps = 176\n",
      "22:55:57 [DEBUG] train episode 2093: reward = 200.00, steps = 200\n",
      "22:55:57 [DEBUG] train episode 2094: reward = 200.00, steps = 200\n",
      "22:55:57 [DEBUG] train episode 2095: reward = 63.00, steps = 63\n",
      "22:55:57 [DEBUG] train episode 2096: reward = 106.00, steps = 106\n",
      "22:55:57 [DEBUG] train episode 2097: reward = 200.00, steps = 200\n",
      "22:55:57 [DEBUG] train episode 2098: reward = 141.00, steps = 141\n",
      "22:55:58 [DEBUG] train episode 2099: reward = 200.00, steps = 200\n",
      "22:55:58 [DEBUG] train episode 2100: reward = 200.00, steps = 200\n",
      "22:55:58 [DEBUG] train episode 2101: reward = 200.00, steps = 200\n",
      "22:55:58 [DEBUG] train episode 2102: reward = 170.00, steps = 170\n",
      "22:55:58 [DEBUG] train episode 2103: reward = 121.00, steps = 121\n",
      "22:55:58 [DEBUG] train episode 2104: reward = 200.00, steps = 200\n",
      "22:55:58 [DEBUG] train episode 2105: reward = 200.00, steps = 200\n",
      "22:55:58 [DEBUG] train episode 2106: reward = 192.00, steps = 192\n",
      "22:55:58 [DEBUG] train episode 2107: reward = 200.00, steps = 200\n",
      "22:55:58 [DEBUG] train episode 2108: reward = 200.00, steps = 200\n",
      "22:55:59 [DEBUG] train episode 2109: reward = 190.00, steps = 190\n",
      "22:55:59 [DEBUG] train episode 2110: reward = 200.00, steps = 200\n",
      "22:55:59 [DEBUG] train episode 2111: reward = 200.00, steps = 200\n",
      "22:55:59 [DEBUG] train episode 2112: reward = 200.00, steps = 200\n",
      "22:55:59 [DEBUG] train episode 2113: reward = 200.00, steps = 200\n",
      "22:55:59 [DEBUG] train episode 2114: reward = 200.00, steps = 200\n",
      "22:55:59 [DEBUG] train episode 2115: reward = 200.00, steps = 200\n",
      "22:55:59 [DEBUG] train episode 2116: reward = 200.00, steps = 200\n",
      "22:56:00 [DEBUG] train episode 2117: reward = 200.00, steps = 200\n",
      "22:56:00 [DEBUG] train episode 2118: reward = 114.00, steps = 114\n",
      "22:56:00 [DEBUG] train episode 2119: reward = 199.00, steps = 199\n",
      "22:56:00 [DEBUG] train episode 2120: reward = 200.00, steps = 200\n",
      "22:56:00 [DEBUG] train episode 2121: reward = 200.00, steps = 200\n",
      "22:56:00 [DEBUG] train episode 2122: reward = 200.00, steps = 200\n",
      "22:56:00 [DEBUG] train episode 2123: reward = 151.00, steps = 151\n",
      "22:56:00 [DEBUG] train episode 2124: reward = 200.00, steps = 200\n",
      "22:56:00 [DEBUG] train episode 2125: reward = 114.00, steps = 114\n",
      "22:56:00 [DEBUG] train episode 2126: reward = 200.00, steps = 200\n",
      "22:56:00 [DEBUG] train episode 2127: reward = 152.00, steps = 152\n",
      "22:56:01 [DEBUG] train episode 2128: reward = 191.00, steps = 191\n",
      "22:56:01 [DEBUG] train episode 2129: reward = 166.00, steps = 166\n",
      "22:56:01 [DEBUG] train episode 2130: reward = 143.00, steps = 143\n",
      "22:56:01 [DEBUG] train episode 2131: reward = 200.00, steps = 200\n",
      "22:56:01 [DEBUG] train episode 2132: reward = 141.00, steps = 141\n",
      "22:56:01 [DEBUG] train episode 2133: reward = 200.00, steps = 200\n",
      "22:56:01 [DEBUG] train episode 2134: reward = 200.00, steps = 200\n",
      "22:56:01 [DEBUG] train episode 2135: reward = 200.00, steps = 200\n",
      "22:56:01 [DEBUG] train episode 2136: reward = 200.00, steps = 200\n",
      "22:56:01 [DEBUG] train episode 2137: reward = 200.00, steps = 200\n",
      "22:56:01 [DEBUG] train episode 2138: reward = 200.00, steps = 200\n",
      "22:56:02 [DEBUG] train episode 2139: reward = 164.00, steps = 164\n",
      "22:56:02 [DEBUG] train episode 2140: reward = 125.00, steps = 125\n",
      "22:56:02 [DEBUG] train episode 2141: reward = 200.00, steps = 200\n",
      "22:56:02 [DEBUG] train episode 2142: reward = 200.00, steps = 200\n",
      "22:56:02 [DEBUG] train episode 2143: reward = 200.00, steps = 200\n",
      "22:56:02 [DEBUG] train episode 2144: reward = 200.00, steps = 200\n",
      "22:56:02 [DEBUG] train episode 2145: reward = 113.00, steps = 113\n",
      "22:56:02 [DEBUG] train episode 2146: reward = 200.00, steps = 200\n",
      "22:56:02 [DEBUG] train episode 2147: reward = 191.00, steps = 191\n",
      "22:56:02 [DEBUG] train episode 2148: reward = 140.00, steps = 140\n",
      "22:56:02 [DEBUG] train episode 2149: reward = 75.00, steps = 75\n",
      "22:56:02 [DEBUG] train episode 2150: reward = 177.00, steps = 177\n",
      "22:56:03 [DEBUG] train episode 2151: reward = 200.00, steps = 200\n",
      "22:56:03 [DEBUG] train episode 2152: reward = 187.00, steps = 187\n",
      "22:56:03 [DEBUG] train episode 2153: reward = 68.00, steps = 68\n",
      "22:56:03 [DEBUG] train episode 2154: reward = 200.00, steps = 200\n",
      "22:56:03 [DEBUG] train episode 2155: reward = 200.00, steps = 200\n",
      "22:56:03 [DEBUG] train episode 2156: reward = 200.00, steps = 200\n",
      "22:56:03 [DEBUG] train episode 2157: reward = 200.00, steps = 200\n",
      "22:56:03 [DEBUG] train episode 2158: reward = 200.00, steps = 200\n",
      "22:56:03 [DEBUG] train episode 2159: reward = 200.00, steps = 200\n",
      "22:56:03 [DEBUG] train episode 2160: reward = 200.00, steps = 200\n",
      "22:56:04 [DEBUG] train episode 2161: reward = 172.00, steps = 172\n",
      "22:56:04 [DEBUG] train episode 2162: reward = 200.00, steps = 200\n",
      "22:56:04 [DEBUG] train episode 2163: reward = 200.00, steps = 200\n",
      "22:56:04 [DEBUG] train episode 2164: reward = 177.00, steps = 177\n",
      "22:56:04 [DEBUG] train episode 2165: reward = 200.00, steps = 200\n",
      "22:56:04 [DEBUG] train episode 2166: reward = 200.00, steps = 200\n",
      "22:56:04 [DEBUG] train episode 2167: reward = 200.00, steps = 200\n",
      "22:56:04 [DEBUG] train episode 2168: reward = 200.00, steps = 200\n",
      "22:56:04 [DEBUG] train episode 2169: reward = 187.00, steps = 187\n",
      "22:56:05 [DEBUG] train episode 2170: reward = 164.00, steps = 164\n",
      "22:56:05 [DEBUG] train episode 2171: reward = 165.00, steps = 165\n",
      "22:56:05 [DEBUG] train episode 2172: reward = 200.00, steps = 200\n",
      "22:56:05 [DEBUG] train episode 2173: reward = 200.00, steps = 200\n",
      "22:56:05 [DEBUG] train episode 2174: reward = 200.00, steps = 200\n",
      "22:56:05 [DEBUG] train episode 2175: reward = 200.00, steps = 200\n",
      "22:56:05 [DEBUG] train episode 2176: reward = 80.00, steps = 80\n",
      "22:56:05 [DEBUG] train episode 2177: reward = 200.00, steps = 200\n",
      "22:56:05 [DEBUG] train episode 2178: reward = 172.00, steps = 172\n",
      "22:56:05 [DEBUG] train episode 2179: reward = 94.00, steps = 94\n",
      "22:56:06 [DEBUG] train episode 2180: reward = 200.00, steps = 200\n",
      "22:56:06 [DEBUG] train episode 2181: reward = 200.00, steps = 200\n",
      "22:56:06 [DEBUG] train episode 2182: reward = 182.00, steps = 182\n",
      "22:56:06 [DEBUG] train episode 2183: reward = 190.00, steps = 190\n",
      "22:56:06 [DEBUG] train episode 2184: reward = 80.00, steps = 80\n",
      "22:56:06 [DEBUG] train episode 2185: reward = 200.00, steps = 200\n",
      "22:56:06 [DEBUG] train episode 2186: reward = 200.00, steps = 200\n",
      "22:56:06 [DEBUG] train episode 2187: reward = 200.00, steps = 200\n",
      "22:56:06 [DEBUG] train episode 2188: reward = 199.00, steps = 199\n",
      "22:56:06 [DEBUG] train episode 2189: reward = 172.00, steps = 172\n",
      "22:56:07 [DEBUG] train episode 2190: reward = 200.00, steps = 200\n",
      "22:56:07 [DEBUG] train episode 2191: reward = 200.00, steps = 200\n",
      "22:56:07 [DEBUG] train episode 2192: reward = 200.00, steps = 200\n",
      "22:56:07 [DEBUG] train episode 2193: reward = 188.00, steps = 188\n",
      "22:56:07 [DEBUG] train episode 2194: reward = 200.00, steps = 200\n",
      "22:56:07 [DEBUG] train episode 2195: reward = 200.00, steps = 200\n",
      "22:56:07 [DEBUG] train episode 2196: reward = 200.00, steps = 200\n",
      "22:56:07 [DEBUG] train episode 2197: reward = 200.00, steps = 200\n",
      "22:56:07 [DEBUG] train episode 2198: reward = 200.00, steps = 200\n",
      "22:56:08 [DEBUG] train episode 2199: reward = 200.00, steps = 200\n",
      "22:56:08 [DEBUG] train episode 2200: reward = 190.00, steps = 190\n",
      "22:56:08 [DEBUG] train episode 2201: reward = 200.00, steps = 200\n",
      "22:56:08 [DEBUG] train episode 2202: reward = 200.00, steps = 200\n",
      "22:56:08 [DEBUG] train episode 2203: reward = 140.00, steps = 140\n",
      "22:56:08 [DEBUG] train episode 2204: reward = 178.00, steps = 178\n",
      "22:56:08 [DEBUG] train episode 2205: reward = 200.00, steps = 200\n",
      "22:56:08 [DEBUG] train episode 2206: reward = 200.00, steps = 200\n",
      "22:56:08 [DEBUG] train episode 2207: reward = 145.00, steps = 145\n",
      "22:56:08 [DEBUG] train episode 2208: reward = 200.00, steps = 200\n",
      "22:56:09 [DEBUG] train episode 2209: reward = 200.00, steps = 200\n",
      "22:56:09 [DEBUG] train episode 2210: reward = 200.00, steps = 200\n",
      "22:56:09 [DEBUG] train episode 2211: reward = 200.00, steps = 200\n",
      "22:56:09 [DEBUG] train episode 2212: reward = 200.00, steps = 200\n",
      "22:56:09 [DEBUG] train episode 2213: reward = 200.00, steps = 200\n",
      "22:56:09 [DEBUG] train episode 2214: reward = 200.00, steps = 200\n",
      "22:56:09 [DEBUG] train episode 2215: reward = 200.00, steps = 200\n",
      "22:56:09 [DEBUG] train episode 2216: reward = 200.00, steps = 200\n",
      "22:56:10 [DEBUG] train episode 2217: reward = 200.00, steps = 200\n",
      "22:56:10 [DEBUG] train episode 2218: reward = 126.00, steps = 126\n",
      "22:56:10 [DEBUG] train episode 2219: reward = 192.00, steps = 192\n",
      "22:56:10 [DEBUG] train episode 2220: reward = 200.00, steps = 200\n",
      "22:56:10 [DEBUG] train episode 2221: reward = 183.00, steps = 183\n",
      "22:56:10 [DEBUG] train episode 2222: reward = 200.00, steps = 200\n",
      "22:56:10 [DEBUG] train episode 2223: reward = 200.00, steps = 200\n",
      "22:56:10 [DEBUG] train episode 2224: reward = 130.00, steps = 130\n",
      "22:56:11 [DEBUG] train episode 2225: reward = 200.00, steps = 200\n",
      "22:56:11 [DEBUG] train episode 2226: reward = 200.00, steps = 200\n",
      "22:56:11 [DEBUG] train episode 2227: reward = 200.00, steps = 200\n",
      "22:56:11 [DEBUG] train episode 2228: reward = 132.00, steps = 132\n",
      "22:56:11 [DEBUG] train episode 2229: reward = 200.00, steps = 200\n",
      "22:56:11 [DEBUG] train episode 2230: reward = 200.00, steps = 200\n",
      "22:56:11 [DEBUG] train episode 2231: reward = 200.00, steps = 200\n",
      "22:56:11 [DEBUG] train episode 2232: reward = 200.00, steps = 200\n",
      "22:56:11 [DEBUG] train episode 2233: reward = 200.00, steps = 200\n",
      "22:56:11 [DEBUG] train episode 2234: reward = 27.00, steps = 27\n",
      "22:56:11 [DEBUG] train episode 2235: reward = 148.00, steps = 148\n",
      "22:56:12 [DEBUG] train episode 2236: reward = 142.00, steps = 142\n",
      "22:56:12 [DEBUG] train episode 2237: reward = 200.00, steps = 200\n",
      "22:56:12 [DEBUG] train episode 2238: reward = 194.00, steps = 194\n",
      "22:56:12 [DEBUG] train episode 2239: reward = 200.00, steps = 200\n",
      "22:56:12 [DEBUG] train episode 2240: reward = 83.00, steps = 83\n",
      "22:56:12 [DEBUG] train episode 2241: reward = 200.00, steps = 200\n",
      "22:56:12 [DEBUG] train episode 2242: reward = 158.00, steps = 158\n",
      "22:56:12 [DEBUG] train episode 2243: reward = 180.00, steps = 180\n",
      "22:56:12 [DEBUG] train episode 2244: reward = 164.00, steps = 164\n",
      "22:56:12 [DEBUG] train episode 2245: reward = 136.00, steps = 136\n",
      "22:56:13 [DEBUG] train episode 2246: reward = 184.00, steps = 184\n",
      "22:56:13 [DEBUG] train episode 2247: reward = 154.00, steps = 154\n",
      "22:56:13 [DEBUG] train episode 2248: reward = 154.00, steps = 154\n",
      "22:56:13 [DEBUG] train episode 2249: reward = 200.00, steps = 200\n",
      "22:56:13 [DEBUG] train episode 2250: reward = 190.00, steps = 190\n",
      "22:56:13 [DEBUG] train episode 2251: reward = 150.00, steps = 150\n",
      "22:56:13 [DEBUG] train episode 2252: reward = 200.00, steps = 200\n",
      "22:56:13 [DEBUG] train episode 2253: reward = 200.00, steps = 200\n",
      "22:56:13 [DEBUG] train episode 2254: reward = 200.00, steps = 200\n",
      "22:56:13 [DEBUG] train episode 2255: reward = 152.00, steps = 152\n",
      "22:56:14 [DEBUG] train episode 2256: reward = 200.00, steps = 200\n",
      "22:56:14 [DEBUG] train episode 2257: reward = 16.00, steps = 16\n",
      "22:56:14 [DEBUG] train episode 2258: reward = 178.00, steps = 178\n",
      "22:56:14 [DEBUG] train episode 2259: reward = 200.00, steps = 200\n",
      "22:56:14 [DEBUG] train episode 2260: reward = 200.00, steps = 200\n",
      "22:56:14 [DEBUG] train episode 2261: reward = 200.00, steps = 200\n",
      "22:56:14 [DEBUG] train episode 2262: reward = 160.00, steps = 160\n",
      "22:56:14 [DEBUG] train episode 2263: reward = 32.00, steps = 32\n",
      "22:56:14 [DEBUG] train episode 2264: reward = 76.00, steps = 76\n",
      "22:56:14 [DEBUG] train episode 2265: reward = 200.00, steps = 200\n",
      "22:56:15 [DEBUG] train episode 2266: reward = 200.00, steps = 200\n",
      "22:56:15 [DEBUG] train episode 2267: reward = 171.00, steps = 171\n",
      "22:56:15 [DEBUG] train episode 2268: reward = 147.00, steps = 147\n",
      "22:56:15 [DEBUG] train episode 2269: reward = 200.00, steps = 200\n",
      "22:56:15 [DEBUG] train episode 2270: reward = 87.00, steps = 87\n",
      "22:56:15 [DEBUG] train episode 2271: reward = 200.00, steps = 200\n",
      "22:56:15 [DEBUG] train episode 2272: reward = 185.00, steps = 185\n",
      "22:56:15 [DEBUG] train episode 2273: reward = 200.00, steps = 200\n",
      "22:56:15 [DEBUG] train episode 2274: reward = 200.00, steps = 200\n",
      "22:56:15 [DEBUG] train episode 2275: reward = 167.00, steps = 167\n",
      "22:56:16 [DEBUG] train episode 2276: reward = 186.00, steps = 186\n",
      "22:56:16 [DEBUG] train episode 2277: reward = 160.00, steps = 160\n",
      "22:56:16 [DEBUG] train episode 2278: reward = 156.00, steps = 156\n",
      "22:56:16 [DEBUG] train episode 2279: reward = 200.00, steps = 200\n",
      "22:56:16 [DEBUG] train episode 2280: reward = 185.00, steps = 185\n",
      "22:56:16 [DEBUG] train episode 2281: reward = 200.00, steps = 200\n",
      "22:56:16 [DEBUG] train episode 2282: reward = 147.00, steps = 147\n",
      "22:56:16 [DEBUG] train episode 2283: reward = 200.00, steps = 200\n",
      "22:56:16 [DEBUG] train episode 2284: reward = 132.00, steps = 132\n",
      "22:56:17 [DEBUG] train episode 2285: reward = 200.00, steps = 200\n",
      "22:56:17 [DEBUG] train episode 2286: reward = 200.00, steps = 200\n",
      "22:56:17 [DEBUG] train episode 2287: reward = 200.00, steps = 200\n",
      "22:56:17 [DEBUG] train episode 2288: reward = 124.00, steps = 124\n",
      "22:56:17 [DEBUG] train episode 2289: reward = 200.00, steps = 200\n",
      "22:56:17 [DEBUG] train episode 2290: reward = 200.00, steps = 200\n",
      "22:56:17 [DEBUG] train episode 2291: reward = 199.00, steps = 199\n",
      "22:56:17 [DEBUG] train episode 2292: reward = 126.00, steps = 126\n",
      "22:56:17 [DEBUG] train episode 2293: reward = 178.00, steps = 178\n",
      "22:56:17 [DEBUG] train episode 2294: reward = 200.00, steps = 200\n",
      "22:56:18 [DEBUG] train episode 2295: reward = 200.00, steps = 200\n",
      "22:56:18 [DEBUG] train episode 2296: reward = 182.00, steps = 182\n",
      "22:56:18 [DEBUG] train episode 2297: reward = 182.00, steps = 182\n",
      "22:56:18 [DEBUG] train episode 2298: reward = 200.00, steps = 200\n",
      "22:56:18 [DEBUG] train episode 2299: reward = 132.00, steps = 132\n",
      "22:56:18 [DEBUG] train episode 2300: reward = 200.00, steps = 200\n",
      "22:56:18 [DEBUG] train episode 2301: reward = 119.00, steps = 119\n",
      "22:56:18 [DEBUG] train episode 2302: reward = 200.00, steps = 200\n",
      "22:56:18 [DEBUG] train episode 2303: reward = 200.00, steps = 200\n",
      "22:56:18 [DEBUG] train episode 2304: reward = 200.00, steps = 200\n",
      "22:56:19 [DEBUG] train episode 2305: reward = 178.00, steps = 178\n",
      "22:56:19 [DEBUG] train episode 2306: reward = 133.00, steps = 133\n",
      "22:56:19 [DEBUG] train episode 2307: reward = 200.00, steps = 200\n",
      "22:56:19 [DEBUG] train episode 2308: reward = 173.00, steps = 173\n",
      "22:56:19 [DEBUG] train episode 2309: reward = 200.00, steps = 200\n",
      "22:56:19 [DEBUG] train episode 2310: reward = 200.00, steps = 200\n",
      "22:56:19 [DEBUG] train episode 2311: reward = 163.00, steps = 163\n",
      "22:56:19 [DEBUG] train episode 2312: reward = 191.00, steps = 191\n",
      "22:56:19 [DEBUG] train episode 2313: reward = 169.00, steps = 169\n",
      "22:56:20 [DEBUG] train episode 2314: reward = 200.00, steps = 200\n",
      "22:56:20 [DEBUG] train episode 2315: reward = 180.00, steps = 180\n",
      "22:56:20 [DEBUG] train episode 2316: reward = 185.00, steps = 185\n",
      "22:56:20 [DEBUG] train episode 2317: reward = 200.00, steps = 200\n",
      "22:56:20 [DEBUG] train episode 2318: reward = 185.00, steps = 185\n",
      "22:56:20 [DEBUG] train episode 2319: reward = 168.00, steps = 168\n",
      "22:56:20 [DEBUG] train episode 2320: reward = 42.00, steps = 42\n",
      "22:56:20 [DEBUG] train episode 2321: reward = 200.00, steps = 200\n",
      "22:56:20 [DEBUG] train episode 2322: reward = 127.00, steps = 127\n",
      "22:56:20 [DEBUG] train episode 2323: reward = 200.00, steps = 200\n",
      "22:56:20 [DEBUG] train episode 2324: reward = 178.00, steps = 178\n",
      "22:56:21 [DEBUG] train episode 2325: reward = 156.00, steps = 156\n",
      "22:56:21 [DEBUG] train episode 2326: reward = 142.00, steps = 142\n",
      "22:56:21 [DEBUG] train episode 2327: reward = 75.00, steps = 75\n",
      "22:56:21 [DEBUG] train episode 2328: reward = 200.00, steps = 200\n",
      "22:56:21 [DEBUG] train episode 2329: reward = 200.00, steps = 200\n",
      "22:56:21 [DEBUG] train episode 2330: reward = 165.00, steps = 165\n",
      "22:56:21 [DEBUG] train episode 2331: reward = 200.00, steps = 200\n",
      "22:56:21 [DEBUG] train episode 2332: reward = 195.00, steps = 195\n",
      "22:56:21 [DEBUG] train episode 2333: reward = 200.00, steps = 200\n",
      "22:56:21 [DEBUG] train episode 2334: reward = 177.00, steps = 177\n",
      "22:56:22 [DEBUG] train episode 2335: reward = 200.00, steps = 200\n",
      "22:56:22 [DEBUG] train episode 2336: reward = 154.00, steps = 154\n",
      "22:56:22 [DEBUG] train episode 2337: reward = 200.00, steps = 200\n",
      "22:56:22 [DEBUG] train episode 2338: reward = 157.00, steps = 157\n",
      "22:56:22 [DEBUG] train episode 2339: reward = 200.00, steps = 200\n",
      "22:56:22 [DEBUG] train episode 2340: reward = 200.00, steps = 200\n",
      "22:56:22 [DEBUG] train episode 2341: reward = 200.00, steps = 200\n",
      "22:56:22 [DEBUG] train episode 2342: reward = 178.00, steps = 178\n",
      "22:56:22 [DEBUG] train episode 2343: reward = 138.00, steps = 138\n",
      "22:56:22 [DEBUG] train episode 2344: reward = 127.00, steps = 127\n",
      "22:56:23 [DEBUG] train episode 2345: reward = 97.00, steps = 97\n",
      "22:56:23 [DEBUG] train episode 2346: reward = 200.00, steps = 200\n",
      "22:56:23 [DEBUG] train episode 2347: reward = 97.00, steps = 97\n",
      "22:56:23 [DEBUG] train episode 2348: reward = 156.00, steps = 156\n",
      "22:56:23 [DEBUG] train episode 2349: reward = 185.00, steps = 185\n",
      "22:56:23 [DEBUG] train episode 2350: reward = 176.00, steps = 176\n",
      "22:56:23 [DEBUG] train episode 2351: reward = 39.00, steps = 39\n",
      "22:56:23 [DEBUG] train episode 2352: reward = 200.00, steps = 200\n",
      "22:56:23 [DEBUG] train episode 2353: reward = 157.00, steps = 157\n",
      "22:56:23 [DEBUG] train episode 2354: reward = 200.00, steps = 200\n",
      "22:56:23 [DEBUG] train episode 2355: reward = 183.00, steps = 183\n",
      "22:56:24 [DEBUG] train episode 2356: reward = 200.00, steps = 200\n",
      "22:56:24 [DEBUG] train episode 2357: reward = 200.00, steps = 200\n",
      "22:56:24 [DEBUG] train episode 2358: reward = 200.00, steps = 200\n",
      "22:56:24 [DEBUG] train episode 2359: reward = 200.00, steps = 200\n",
      "22:56:24 [DEBUG] train episode 2360: reward = 200.00, steps = 200\n",
      "22:56:24 [DEBUG] train episode 2361: reward = 200.00, steps = 200\n",
      "22:56:24 [DEBUG] train episode 2362: reward = 120.00, steps = 120\n",
      "22:56:24 [DEBUG] train episode 2363: reward = 200.00, steps = 200\n",
      "22:56:24 [DEBUG] train episode 2364: reward = 200.00, steps = 200\n",
      "22:56:24 [DEBUG] train episode 2365: reward = 200.00, steps = 200\n",
      "22:56:25 [DEBUG] train episode 2366: reward = 200.00, steps = 200\n",
      "22:56:25 [DEBUG] train episode 2367: reward = 141.00, steps = 141\n",
      "22:56:25 [DEBUG] train episode 2368: reward = 200.00, steps = 200\n",
      "22:56:25 [DEBUG] train episode 2369: reward = 200.00, steps = 200\n",
      "22:56:25 [DEBUG] train episode 2370: reward = 200.00, steps = 200\n",
      "22:56:25 [DEBUG] train episode 2371: reward = 200.00, steps = 200\n",
      "22:56:25 [DEBUG] train episode 2372: reward = 187.00, steps = 187\n",
      "22:56:25 [DEBUG] train episode 2373: reward = 188.00, steps = 188\n",
      "22:56:25 [DEBUG] train episode 2374: reward = 200.00, steps = 200\n",
      "22:56:26 [DEBUG] train episode 2375: reward = 186.00, steps = 186\n",
      "22:56:26 [DEBUG] train episode 2376: reward = 200.00, steps = 200\n",
      "22:56:26 [DEBUG] train episode 2377: reward = 109.00, steps = 109\n",
      "22:56:26 [DEBUG] train episode 2378: reward = 200.00, steps = 200\n",
      "22:56:26 [DEBUG] train episode 2379: reward = 164.00, steps = 164\n",
      "22:56:26 [DEBUG] train episode 2380: reward = 200.00, steps = 200\n",
      "22:56:26 [DEBUG] train episode 2381: reward = 200.00, steps = 200\n",
      "22:56:26 [DEBUG] train episode 2382: reward = 129.00, steps = 129\n",
      "22:56:26 [DEBUG] train episode 2383: reward = 166.00, steps = 166\n",
      "22:56:26 [DEBUG] train episode 2384: reward = 123.00, steps = 123\n",
      "22:56:26 [DEBUG] train episode 2385: reward = 65.00, steps = 65\n",
      "22:56:27 [DEBUG] train episode 2386: reward = 200.00, steps = 200\n",
      "22:56:27 [DEBUG] train episode 2387: reward = 143.00, steps = 143\n",
      "22:56:27 [DEBUG] train episode 2388: reward = 182.00, steps = 182\n",
      "22:56:27 [DEBUG] train episode 2389: reward = 200.00, steps = 200\n",
      "22:56:27 [DEBUG] train episode 2390: reward = 200.00, steps = 200\n",
      "22:56:27 [DEBUG] train episode 2391: reward = 166.00, steps = 166\n",
      "22:56:27 [DEBUG] train episode 2392: reward = 200.00, steps = 200\n",
      "22:56:27 [DEBUG] train episode 2393: reward = 185.00, steps = 185\n",
      "22:56:27 [DEBUG] train episode 2394: reward = 124.00, steps = 124\n",
      "22:56:28 [DEBUG] train episode 2395: reward = 192.00, steps = 192\n",
      "22:56:28 [DEBUG] train episode 2396: reward = 200.00, steps = 200\n",
      "22:56:28 [DEBUG] train episode 2397: reward = 32.00, steps = 32\n",
      "22:56:28 [DEBUG] train episode 2398: reward = 183.00, steps = 183\n",
      "22:56:28 [DEBUG] train episode 2399: reward = 200.00, steps = 200\n",
      "22:56:28 [DEBUG] train episode 2400: reward = 81.00, steps = 81\n",
      "22:56:28 [DEBUG] train episode 2401: reward = 200.00, steps = 200\n",
      "22:56:28 [DEBUG] train episode 2402: reward = 200.00, steps = 200\n",
      "22:56:28 [DEBUG] train episode 2403: reward = 172.00, steps = 172\n",
      "22:56:28 [DEBUG] train episode 2404: reward = 200.00, steps = 200\n",
      "22:56:28 [DEBUG] train episode 2405: reward = 200.00, steps = 200\n",
      "22:56:29 [DEBUG] train episode 2406: reward = 200.00, steps = 200\n",
      "22:56:29 [DEBUG] train episode 2407: reward = 131.00, steps = 131\n",
      "22:56:29 [DEBUG] train episode 2408: reward = 200.00, steps = 200\n",
      "22:56:29 [DEBUG] train episode 2409: reward = 136.00, steps = 136\n",
      "22:56:29 [DEBUG] train episode 2410: reward = 144.00, steps = 144\n",
      "22:56:29 [DEBUG] train episode 2411: reward = 125.00, steps = 125\n",
      "22:56:29 [DEBUG] train episode 2412: reward = 200.00, steps = 200\n",
      "22:56:29 [DEBUG] train episode 2413: reward = 200.00, steps = 200\n",
      "22:56:29 [DEBUG] train episode 2414: reward = 138.00, steps = 138\n",
      "22:56:29 [DEBUG] train episode 2415: reward = 65.00, steps = 65\n",
      "22:56:30 [DEBUG] train episode 2416: reward = 193.00, steps = 193\n",
      "22:56:30 [DEBUG] train episode 2417: reward = 148.00, steps = 148\n",
      "22:56:30 [DEBUG] train episode 2418: reward = 187.00, steps = 187\n",
      "22:56:30 [DEBUG] train episode 2419: reward = 200.00, steps = 200\n",
      "22:56:30 [DEBUG] train episode 2420: reward = 160.00, steps = 160\n",
      "22:56:30 [DEBUG] train episode 2421: reward = 161.00, steps = 161\n",
      "22:56:30 [DEBUG] train episode 2422: reward = 124.00, steps = 124\n",
      "22:56:30 [DEBUG] train episode 2423: reward = 199.00, steps = 199\n",
      "22:56:30 [DEBUG] train episode 2424: reward = 187.00, steps = 187\n",
      "22:56:30 [DEBUG] train episode 2425: reward = 134.00, steps = 134\n",
      "22:56:31 [DEBUG] train episode 2426: reward = 121.00, steps = 121\n",
      "22:56:31 [DEBUG] train episode 2427: reward = 111.00, steps = 111\n",
      "22:56:31 [DEBUG] train episode 2428: reward = 169.00, steps = 169\n",
      "22:56:31 [DEBUG] train episode 2429: reward = 99.00, steps = 99\n",
      "22:56:31 [DEBUG] train episode 2430: reward = 200.00, steps = 200\n",
      "22:56:31 [DEBUG] train episode 2431: reward = 200.00, steps = 200\n",
      "22:56:31 [DEBUG] train episode 2432: reward = 200.00, steps = 200\n",
      "22:56:31 [DEBUG] train episode 2433: reward = 152.00, steps = 152\n",
      "22:56:31 [DEBUG] train episode 2434: reward = 200.00, steps = 200\n",
      "22:56:31 [DEBUG] train episode 2435: reward = 200.00, steps = 200\n",
      "22:56:32 [DEBUG] train episode 2436: reward = 189.00, steps = 189\n",
      "22:56:32 [DEBUG] train episode 2437: reward = 200.00, steps = 200\n",
      "22:56:32 [DEBUG] train episode 2438: reward = 161.00, steps = 161\n",
      "22:56:32 [DEBUG] train episode 2439: reward = 189.00, steps = 189\n",
      "22:56:32 [DEBUG] train episode 2440: reward = 200.00, steps = 200\n",
      "22:56:32 [DEBUG] train episode 2441: reward = 167.00, steps = 167\n",
      "22:56:32 [DEBUG] train episode 2442: reward = 200.00, steps = 200\n",
      "22:56:32 [DEBUG] train episode 2443: reward = 200.00, steps = 200\n",
      "22:56:32 [DEBUG] train episode 2444: reward = 129.00, steps = 129\n",
      "22:56:32 [DEBUG] train episode 2445: reward = 144.00, steps = 144\n",
      "22:56:33 [DEBUG] train episode 2446: reward = 197.00, steps = 197\n",
      "22:56:33 [DEBUG] train episode 2447: reward = 181.00, steps = 181\n",
      "22:56:33 [DEBUG] train episode 2448: reward = 160.00, steps = 160\n",
      "22:56:33 [DEBUG] train episode 2449: reward = 108.00, steps = 108\n",
      "22:56:33 [DEBUG] train episode 2450: reward = 155.00, steps = 155\n",
      "22:56:33 [DEBUG] train episode 2451: reward = 186.00, steps = 186\n",
      "22:56:33 [DEBUG] train episode 2452: reward = 68.00, steps = 68\n",
      "22:56:33 [DEBUG] train episode 2453: reward = 137.00, steps = 137\n",
      "22:56:33 [DEBUG] train episode 2454: reward = 137.00, steps = 137\n",
      "22:56:33 [DEBUG] train episode 2455: reward = 200.00, steps = 200\n",
      "22:56:33 [DEBUG] train episode 2456: reward = 142.00, steps = 142\n",
      "22:56:33 [DEBUG] train episode 2457: reward = 200.00, steps = 200\n",
      "22:56:34 [DEBUG] train episode 2458: reward = 200.00, steps = 200\n",
      "22:56:34 [DEBUG] train episode 2459: reward = 200.00, steps = 200\n",
      "22:56:34 [DEBUG] train episode 2460: reward = 169.00, steps = 169\n",
      "22:56:34 [DEBUG] train episode 2461: reward = 200.00, steps = 200\n",
      "22:56:34 [DEBUG] train episode 2462: reward = 179.00, steps = 179\n",
      "22:56:34 [DEBUG] train episode 2463: reward = 200.00, steps = 200\n",
      "22:56:34 [DEBUG] train episode 2464: reward = 196.00, steps = 196\n",
      "22:56:34 [DEBUG] train episode 2465: reward = 112.00, steps = 112\n",
      "22:56:34 [DEBUG] train episode 2466: reward = 200.00, steps = 200\n",
      "22:56:34 [DEBUG] train episode 2467: reward = 145.00, steps = 145\n",
      "22:56:34 [DEBUG] train episode 2468: reward = 129.00, steps = 129\n",
      "22:56:35 [DEBUG] train episode 2469: reward = 130.00, steps = 130\n",
      "22:56:35 [DEBUG] train episode 2470: reward = 157.00, steps = 157\n",
      "22:56:35 [DEBUG] train episode 2471: reward = 165.00, steps = 165\n",
      "22:56:35 [DEBUG] train episode 2472: reward = 158.00, steps = 158\n",
      "22:56:35 [DEBUG] train episode 2473: reward = 200.00, steps = 200\n",
      "22:56:35 [DEBUG] train episode 2474: reward = 187.00, steps = 187\n",
      "22:56:35 [DEBUG] train episode 2475: reward = 200.00, steps = 200\n",
      "22:56:35 [DEBUG] train episode 2476: reward = 200.00, steps = 200\n",
      "22:56:35 [DEBUG] train episode 2477: reward = 168.00, steps = 168\n",
      "22:56:36 [DEBUG] train episode 2478: reward = 200.00, steps = 200\n",
      "22:56:36 [DEBUG] train episode 2479: reward = 148.00, steps = 148\n",
      "22:56:36 [DEBUG] train episode 2480: reward = 200.00, steps = 200\n",
      "22:56:36 [DEBUG] train episode 2481: reward = 200.00, steps = 200\n",
      "22:56:36 [DEBUG] train episode 2482: reward = 196.00, steps = 196\n",
      "22:56:36 [DEBUG] train episode 2483: reward = 183.00, steps = 183\n",
      "22:56:36 [DEBUG] train episode 2484: reward = 163.00, steps = 163\n",
      "22:56:36 [DEBUG] train episode 2485: reward = 200.00, steps = 200\n",
      "22:56:36 [DEBUG] train episode 2486: reward = 198.00, steps = 198\n",
      "22:56:36 [DEBUG] train episode 2487: reward = 155.00, steps = 155\n",
      "22:56:36 [DEBUG] train episode 2488: reward = 156.00, steps = 156\n",
      "22:56:36 [DEBUG] train episode 2489: reward = 200.00, steps = 200\n",
      "22:56:37 [DEBUG] train episode 2490: reward = 200.00, steps = 200\n",
      "22:56:37 [DEBUG] train episode 2491: reward = 142.00, steps = 142\n",
      "22:56:37 [DEBUG] train episode 2492: reward = 155.00, steps = 155\n",
      "22:56:37 [DEBUG] train episode 2493: reward = 146.00, steps = 146\n",
      "22:56:37 [DEBUG] train episode 2494: reward = 200.00, steps = 200\n",
      "22:56:37 [DEBUG] train episode 2495: reward = 182.00, steps = 182\n",
      "22:56:37 [DEBUG] train episode 2496: reward = 139.00, steps = 139\n",
      "22:56:37 [DEBUG] train episode 2497: reward = 200.00, steps = 200\n",
      "22:56:37 [DEBUG] train episode 2498: reward = 200.00, steps = 200\n",
      "22:56:38 [DEBUG] train episode 2499: reward = 200.00, steps = 200\n",
      "22:56:38 [DEBUG] train episode 2500: reward = 200.00, steps = 200\n",
      "22:56:38 [DEBUG] train episode 2501: reward = 200.00, steps = 200\n",
      "22:56:38 [DEBUG] train episode 2502: reward = 192.00, steps = 192\n",
      "22:56:38 [DEBUG] train episode 2503: reward = 200.00, steps = 200\n",
      "22:56:38 [DEBUG] train episode 2504: reward = 200.00, steps = 200\n",
      "22:56:38 [DEBUG] train episode 2505: reward = 190.00, steps = 190\n",
      "22:56:38 [DEBUG] train episode 2506: reward = 156.00, steps = 156\n",
      "22:56:39 [DEBUG] train episode 2507: reward = 136.00, steps = 136\n",
      "22:56:39 [DEBUG] train episode 2508: reward = 180.00, steps = 180\n",
      "22:56:39 [DEBUG] train episode 2509: reward = 200.00, steps = 200\n",
      "22:56:39 [DEBUG] train episode 2510: reward = 148.00, steps = 148\n",
      "22:56:39 [DEBUG] train episode 2511: reward = 200.00, steps = 200\n",
      "22:56:39 [DEBUG] train episode 2512: reward = 200.00, steps = 200\n",
      "22:56:39 [DEBUG] train episode 2513: reward = 200.00, steps = 200\n",
      "22:56:39 [DEBUG] train episode 2514: reward = 174.00, steps = 174\n",
      "22:56:39 [DEBUG] train episode 2515: reward = 166.00, steps = 166\n",
      "22:56:40 [DEBUG] train episode 2516: reward = 196.00, steps = 196\n",
      "22:56:40 [DEBUG] train episode 2517: reward = 127.00, steps = 127\n",
      "22:56:40 [DEBUG] train episode 2518: reward = 187.00, steps = 187\n",
      "22:56:40 [DEBUG] train episode 2519: reward = 73.00, steps = 73\n",
      "22:56:40 [DEBUG] train episode 2520: reward = 169.00, steps = 169\n",
      "22:56:40 [DEBUG] train episode 2521: reward = 200.00, steps = 200\n",
      "22:56:40 [DEBUG] train episode 2522: reward = 200.00, steps = 200\n",
      "22:56:40 [DEBUG] train episode 2523: reward = 200.00, steps = 200\n",
      "22:56:40 [DEBUG] train episode 2524: reward = 156.00, steps = 156\n",
      "22:56:41 [DEBUG] train episode 2525: reward = 179.00, steps = 179\n",
      "22:56:41 [DEBUG] train episode 2526: reward = 200.00, steps = 200\n",
      "22:56:41 [DEBUG] train episode 2527: reward = 126.00, steps = 126\n",
      "22:56:41 [DEBUG] train episode 2528: reward = 142.00, steps = 142\n",
      "22:56:41 [DEBUG] train episode 2529: reward = 197.00, steps = 197\n",
      "22:56:41 [DEBUG] train episode 2530: reward = 161.00, steps = 161\n",
      "22:56:41 [DEBUG] train episode 2531: reward = 187.00, steps = 187\n",
      "22:56:41 [DEBUG] train episode 2532: reward = 200.00, steps = 200\n",
      "22:56:42 [DEBUG] train episode 2533: reward = 200.00, steps = 200\n",
      "22:56:42 [DEBUG] train episode 2534: reward = 181.00, steps = 181\n",
      "22:56:42 [DEBUG] train episode 2535: reward = 175.00, steps = 175\n",
      "22:56:42 [DEBUG] train episode 2536: reward = 200.00, steps = 200\n",
      "22:56:42 [DEBUG] train episode 2537: reward = 76.00, steps = 76\n",
      "22:56:42 [DEBUG] train episode 2538: reward = 192.00, steps = 192\n",
      "22:56:42 [DEBUG] train episode 2539: reward = 118.00, steps = 118\n",
      "22:56:42 [DEBUG] train episode 2540: reward = 200.00, steps = 200\n",
      "22:56:42 [DEBUG] train episode 2541: reward = 168.00, steps = 168\n",
      "22:56:43 [DEBUG] train episode 2542: reward = 178.00, steps = 178\n",
      "22:56:43 [DEBUG] train episode 2543: reward = 172.00, steps = 172\n",
      "22:56:43 [DEBUG] train episode 2544: reward = 138.00, steps = 138\n",
      "22:56:43 [DEBUG] train episode 2545: reward = 200.00, steps = 200\n",
      "22:56:43 [DEBUG] train episode 2546: reward = 176.00, steps = 176\n",
      "22:56:43 [DEBUG] train episode 2547: reward = 200.00, steps = 200\n",
      "22:56:43 [DEBUG] train episode 2548: reward = 200.00, steps = 200\n",
      "22:56:43 [DEBUG] train episode 2549: reward = 162.00, steps = 162\n",
      "22:56:44 [DEBUG] train episode 2550: reward = 200.00, steps = 200\n",
      "22:56:44 [DEBUG] train episode 2551: reward = 200.00, steps = 200\n",
      "22:56:44 [DEBUG] train episode 2552: reward = 189.00, steps = 189\n",
      "22:56:44 [DEBUG] train episode 2553: reward = 157.00, steps = 157\n",
      "22:56:44 [DEBUG] train episode 2554: reward = 200.00, steps = 200\n",
      "22:56:44 [DEBUG] train episode 2555: reward = 200.00, steps = 200\n",
      "22:56:44 [DEBUG] train episode 2556: reward = 200.00, steps = 200\n",
      "22:56:45 [DEBUG] train episode 2557: reward = 187.00, steps = 187\n",
      "22:56:45 [DEBUG] train episode 2558: reward = 156.00, steps = 156\n",
      "22:56:45 [DEBUG] train episode 2559: reward = 130.00, steps = 130\n",
      "22:56:45 [DEBUG] train episode 2560: reward = 200.00, steps = 200\n",
      "22:56:45 [DEBUG] train episode 2561: reward = 40.00, steps = 40\n",
      "22:56:45 [DEBUG] train episode 2562: reward = 200.00, steps = 200\n",
      "22:56:45 [DEBUG] train episode 2563: reward = 194.00, steps = 194\n",
      "22:56:45 [DEBUG] train episode 2564: reward = 200.00, steps = 200\n",
      "22:56:45 [DEBUG] train episode 2565: reward = 200.00, steps = 200\n",
      "22:56:45 [DEBUG] train episode 2566: reward = 173.00, steps = 173\n",
      "22:56:46 [DEBUG] train episode 2567: reward = 149.00, steps = 149\n",
      "22:56:46 [DEBUG] train episode 2568: reward = 200.00, steps = 200\n",
      "22:56:46 [DEBUG] train episode 2569: reward = 136.00, steps = 136\n",
      "22:56:46 [DEBUG] train episode 2570: reward = 200.00, steps = 200\n",
      "22:56:46 [DEBUG] train episode 2571: reward = 200.00, steps = 200\n",
      "22:56:46 [DEBUG] train episode 2572: reward = 153.00, steps = 153\n",
      "22:56:46 [DEBUG] train episode 2573: reward = 165.00, steps = 165\n",
      "22:56:46 [DEBUG] train episode 2574: reward = 200.00, steps = 200\n",
      "22:56:46 [DEBUG] train episode 2575: reward = 200.00, steps = 200\n",
      "22:56:47 [DEBUG] train episode 2576: reward = 179.00, steps = 179\n",
      "22:56:47 [DEBUG] train episode 2577: reward = 176.00, steps = 176\n",
      "22:56:47 [DEBUG] train episode 2578: reward = 180.00, steps = 180\n",
      "22:56:47 [DEBUG] train episode 2579: reward = 119.00, steps = 119\n",
      "22:56:47 [DEBUG] train episode 2580: reward = 123.00, steps = 123\n",
      "22:56:47 [DEBUG] train episode 2581: reward = 186.00, steps = 186\n",
      "22:56:47 [DEBUG] train episode 2582: reward = 200.00, steps = 200\n",
      "22:56:47 [DEBUG] train episode 2583: reward = 180.00, steps = 180\n",
      "22:56:47 [DEBUG] train episode 2584: reward = 150.00, steps = 150\n",
      "22:56:47 [DEBUG] train episode 2585: reward = 200.00, steps = 200\n",
      "22:56:48 [DEBUG] train episode 2586: reward = 158.00, steps = 158\n",
      "22:56:48 [DEBUG] train episode 2587: reward = 200.00, steps = 200\n",
      "22:56:48 [DEBUG] train episode 2588: reward = 180.00, steps = 180\n",
      "22:56:48 [DEBUG] train episode 2589: reward = 200.00, steps = 200\n",
      "22:56:48 [DEBUG] train episode 2590: reward = 200.00, steps = 200\n",
      "22:56:48 [DEBUG] train episode 2591: reward = 189.00, steps = 189\n",
      "22:56:48 [DEBUG] train episode 2592: reward = 200.00, steps = 200\n",
      "22:56:49 [DEBUG] train episode 2593: reward = 200.00, steps = 200\n",
      "22:56:49 [DEBUG] train episode 2594: reward = 200.00, steps = 200\n",
      "22:56:49 [DEBUG] train episode 2595: reward = 200.00, steps = 200\n",
      "22:56:49 [DEBUG] train episode 2596: reward = 146.00, steps = 146\n",
      "22:56:49 [DEBUG] train episode 2597: reward = 177.00, steps = 177\n",
      "22:56:49 [DEBUG] train episode 2598: reward = 179.00, steps = 179\n",
      "22:56:49 [DEBUG] train episode 2599: reward = 136.00, steps = 136\n",
      "22:56:49 [DEBUG] train episode 2600: reward = 84.00, steps = 84\n",
      "22:56:49 [DEBUG] train episode 2601: reward = 148.00, steps = 148\n",
      "22:56:49 [DEBUG] train episode 2602: reward = 200.00, steps = 200\n",
      "22:56:50 [DEBUG] train episode 2603: reward = 200.00, steps = 200\n",
      "22:56:50 [DEBUG] train episode 2604: reward = 133.00, steps = 133\n",
      "22:56:50 [DEBUG] train episode 2605: reward = 167.00, steps = 167\n",
      "22:56:50 [DEBUG] train episode 2606: reward = 200.00, steps = 200\n",
      "22:56:50 [DEBUG] train episode 2607: reward = 130.00, steps = 130\n",
      "22:56:50 [DEBUG] train episode 2608: reward = 200.00, steps = 200\n",
      "22:56:50 [DEBUG] train episode 2609: reward = 146.00, steps = 146\n",
      "22:56:50 [DEBUG] train episode 2610: reward = 190.00, steps = 190\n",
      "22:56:50 [DEBUG] train episode 2611: reward = 174.00, steps = 174\n",
      "22:56:51 [DEBUG] train episode 2612: reward = 127.00, steps = 127\n",
      "22:56:51 [DEBUG] train episode 2613: reward = 188.00, steps = 188\n",
      "22:56:51 [DEBUG] train episode 2614: reward = 200.00, steps = 200\n",
      "22:56:51 [DEBUG] train episode 2615: reward = 146.00, steps = 146\n",
      "22:56:51 [DEBUG] train episode 2616: reward = 192.00, steps = 192\n",
      "22:56:51 [DEBUG] train episode 2617: reward = 200.00, steps = 200\n",
      "22:56:51 [DEBUG] train episode 2618: reward = 195.00, steps = 195\n",
      "22:56:51 [DEBUG] train episode 2619: reward = 192.00, steps = 192\n",
      "22:56:51 [DEBUG] train episode 2620: reward = 200.00, steps = 200\n",
      "22:56:52 [DEBUG] train episode 2621: reward = 200.00, steps = 200\n",
      "22:56:52 [DEBUG] train episode 2622: reward = 170.00, steps = 170\n",
      "22:56:52 [DEBUG] train episode 2623: reward = 178.00, steps = 178\n",
      "22:56:52 [DEBUG] train episode 2624: reward = 172.00, steps = 172\n",
      "22:56:52 [DEBUG] train episode 2625: reward = 165.00, steps = 165\n",
      "22:56:52 [DEBUG] train episode 2626: reward = 131.00, steps = 131\n",
      "22:56:52 [DEBUG] train episode 2627: reward = 200.00, steps = 200\n",
      "22:56:52 [DEBUG] train episode 2628: reward = 200.00, steps = 200\n",
      "22:56:52 [DEBUG] train episode 2629: reward = 183.00, steps = 183\n",
      "22:56:53 [DEBUG] train episode 2630: reward = 144.00, steps = 144\n",
      "22:56:53 [DEBUG] train episode 2631: reward = 114.00, steps = 114\n",
      "22:56:53 [DEBUG] train episode 2632: reward = 200.00, steps = 200\n",
      "22:56:53 [DEBUG] train episode 2633: reward = 169.00, steps = 169\n",
      "22:56:53 [DEBUG] train episode 2634: reward = 146.00, steps = 146\n",
      "22:56:53 [DEBUG] train episode 2635: reward = 128.00, steps = 128\n",
      "22:56:53 [DEBUG] train episode 2636: reward = 200.00, steps = 200\n",
      "22:56:53 [DEBUG] train episode 2637: reward = 67.00, steps = 67\n",
      "22:56:53 [DEBUG] train episode 2638: reward = 200.00, steps = 200\n",
      "22:56:54 [DEBUG] train episode 2639: reward = 158.00, steps = 158\n",
      "22:56:54 [DEBUG] train episode 2640: reward = 177.00, steps = 177\n",
      "22:56:54 [DEBUG] train episode 2641: reward = 200.00, steps = 200\n",
      "22:56:54 [DEBUG] train episode 2642: reward = 151.00, steps = 151\n",
      "22:56:54 [DEBUG] train episode 2643: reward = 59.00, steps = 59\n",
      "22:56:54 [DEBUG] train episode 2644: reward = 177.00, steps = 177\n",
      "22:56:54 [DEBUG] train episode 2645: reward = 200.00, steps = 200\n",
      "22:56:54 [DEBUG] train episode 2646: reward = 200.00, steps = 200\n",
      "22:56:54 [DEBUG] train episode 2647: reward = 200.00, steps = 200\n",
      "22:56:54 [DEBUG] train episode 2648: reward = 132.00, steps = 132\n",
      "22:56:55 [DEBUG] train episode 2649: reward = 200.00, steps = 200\n",
      "22:56:55 [DEBUG] train episode 2650: reward = 178.00, steps = 178\n",
      "22:56:55 [DEBUG] train episode 2651: reward = 149.00, steps = 149\n",
      "22:56:55 [DEBUG] train episode 2652: reward = 200.00, steps = 200\n",
      "22:56:55 [DEBUG] train episode 2653: reward = 187.00, steps = 187\n",
      "22:56:55 [DEBUG] train episode 2654: reward = 179.00, steps = 179\n",
      "22:56:55 [DEBUG] train episode 2655: reward = 162.00, steps = 162\n",
      "22:56:55 [DEBUG] train episode 2656: reward = 200.00, steps = 200\n",
      "22:56:55 [DEBUG] train episode 2657: reward = 200.00, steps = 200\n",
      "22:56:56 [DEBUG] train episode 2658: reward = 200.00, steps = 200\n",
      "22:56:56 [DEBUG] train episode 2659: reward = 200.00, steps = 200\n",
      "22:56:56 [DEBUG] train episode 2660: reward = 200.00, steps = 200\n",
      "22:56:56 [DEBUG] train episode 2661: reward = 200.00, steps = 200\n",
      "22:56:56 [DEBUG] train episode 2662: reward = 200.00, steps = 200\n",
      "22:56:56 [DEBUG] train episode 2663: reward = 200.00, steps = 200\n",
      "22:56:56 [DEBUG] train episode 2664: reward = 200.00, steps = 200\n",
      "22:56:56 [DEBUG] train episode 2665: reward = 200.00, steps = 200\n",
      "22:56:56 [DEBUG] train episode 2666: reward = 149.00, steps = 149\n",
      "22:56:57 [DEBUG] train episode 2667: reward = 200.00, steps = 200\n",
      "22:56:57 [DEBUG] train episode 2668: reward = 150.00, steps = 150\n",
      "22:56:57 [DEBUG] train episode 2669: reward = 176.00, steps = 176\n",
      "22:56:57 [DEBUG] train episode 2670: reward = 200.00, steps = 200\n",
      "22:56:57 [DEBUG] train episode 2671: reward = 158.00, steps = 158\n",
      "22:56:57 [DEBUG] train episode 2672: reward = 188.00, steps = 188\n",
      "22:56:57 [DEBUG] train episode 2673: reward = 200.00, steps = 200\n",
      "22:56:57 [DEBUG] train episode 2674: reward = 200.00, steps = 200\n",
      "22:56:58 [DEBUG] train episode 2675: reward = 200.00, steps = 200\n",
      "22:56:58 [DEBUG] train episode 2676: reward = 167.00, steps = 167\n",
      "22:56:58 [DEBUG] train episode 2677: reward = 184.00, steps = 184\n",
      "22:56:58 [DEBUG] train episode 2678: reward = 200.00, steps = 200\n",
      "22:56:58 [DEBUG] train episode 2679: reward = 170.00, steps = 170\n",
      "22:56:58 [DEBUG] train episode 2680: reward = 135.00, steps = 135\n",
      "22:56:58 [DEBUG] train episode 2681: reward = 143.00, steps = 143\n",
      "22:56:58 [DEBUG] train episode 2682: reward = 169.00, steps = 169\n",
      "22:56:59 [DEBUG] train episode 2683: reward = 200.00, steps = 200\n",
      "22:56:59 [DEBUG] train episode 2684: reward = 200.00, steps = 200\n",
      "22:56:59 [DEBUG] train episode 2685: reward = 163.00, steps = 163\n",
      "22:56:59 [DEBUG] train episode 2686: reward = 195.00, steps = 195\n",
      "22:56:59 [DEBUG] train episode 2687: reward = 200.00, steps = 200\n",
      "22:56:59 [DEBUG] train episode 2688: reward = 200.00, steps = 200\n",
      "22:56:59 [DEBUG] train episode 2689: reward = 152.00, steps = 152\n",
      "22:56:59 [DEBUG] train episode 2690: reward = 200.00, steps = 200\n",
      "22:57:00 [DEBUG] train episode 2691: reward = 200.00, steps = 200\n",
      "22:57:00 [DEBUG] train episode 2692: reward = 200.00, steps = 200\n",
      "22:57:00 [DEBUG] train episode 2693: reward = 200.00, steps = 200\n",
      "22:57:00 [DEBUG] train episode 2694: reward = 135.00, steps = 135\n",
      "22:57:00 [DEBUG] train episode 2695: reward = 200.00, steps = 200\n",
      "22:57:00 [DEBUG] train episode 2696: reward = 122.00, steps = 122\n",
      "22:57:00 [DEBUG] train episode 2697: reward = 155.00, steps = 155\n",
      "22:57:00 [DEBUG] train episode 2698: reward = 200.00, steps = 200\n",
      "22:57:01 [DEBUG] train episode 2699: reward = 200.00, steps = 200\n",
      "22:57:01 [DEBUG] train episode 2700: reward = 200.00, steps = 200\n",
      "22:57:01 [DEBUG] train episode 2701: reward = 200.00, steps = 200\n",
      "22:57:01 [DEBUG] train episode 2702: reward = 200.00, steps = 200\n",
      "22:57:01 [DEBUG] train episode 2703: reward = 200.00, steps = 200\n",
      "22:57:01 [DEBUG] train episode 2704: reward = 184.00, steps = 184\n",
      "22:57:01 [DEBUG] train episode 2705: reward = 200.00, steps = 200\n",
      "22:57:01 [DEBUG] train episode 2706: reward = 164.00, steps = 164\n",
      "22:57:02 [DEBUG] train episode 2707: reward = 200.00, steps = 200\n",
      "22:57:02 [DEBUG] train episode 2708: reward = 200.00, steps = 200\n",
      "22:57:02 [DEBUG] train episode 2709: reward = 154.00, steps = 154\n",
      "22:57:02 [DEBUG] train episode 2710: reward = 173.00, steps = 173\n",
      "22:57:02 [DEBUG] train episode 2711: reward = 200.00, steps = 200\n",
      "22:57:02 [DEBUG] train episode 2712: reward = 200.00, steps = 200\n",
      "22:57:02 [DEBUG] train episode 2713: reward = 200.00, steps = 200\n",
      "22:57:02 [DEBUG] train episode 2714: reward = 171.00, steps = 171\n",
      "22:57:03 [DEBUG] train episode 2715: reward = 200.00, steps = 200\n",
      "22:57:03 [DEBUG] train episode 2716: reward = 200.00, steps = 200\n",
      "22:57:03 [DEBUG] train episode 2717: reward = 200.00, steps = 200\n",
      "22:57:03 [DEBUG] train episode 2718: reward = 200.00, steps = 200\n",
      "22:57:03 [DEBUG] train episode 2719: reward = 188.00, steps = 188\n",
      "22:57:03 [DEBUG] train episode 2720: reward = 200.00, steps = 200\n",
      "22:57:03 [DEBUG] train episode 2721: reward = 152.00, steps = 152\n",
      "22:57:03 [DEBUG] train episode 2722: reward = 200.00, steps = 200\n",
      "22:57:03 [DEBUG] train episode 2723: reward = 200.00, steps = 200\n",
      "22:57:04 [DEBUG] train episode 2724: reward = 170.00, steps = 170\n",
      "22:57:04 [DEBUG] train episode 2725: reward = 154.00, steps = 154\n",
      "22:57:04 [DEBUG] train episode 2726: reward = 200.00, steps = 200\n",
      "22:57:04 [DEBUG] train episode 2727: reward = 200.00, steps = 200\n",
      "22:57:04 [DEBUG] train episode 2728: reward = 159.00, steps = 159\n",
      "22:57:04 [DEBUG] train episode 2729: reward = 200.00, steps = 200\n",
      "22:57:04 [DEBUG] train episode 2730: reward = 200.00, steps = 200\n",
      "22:57:04 [DEBUG] train episode 2731: reward = 200.00, steps = 200\n",
      "22:57:04 [DEBUG] train episode 2732: reward = 200.00, steps = 200\n",
      "22:57:05 [DEBUG] train episode 2733: reward = 200.00, steps = 200\n",
      "22:57:05 [DEBUG] train episode 2734: reward = 153.00, steps = 153\n",
      "22:57:05 [DEBUG] train episode 2735: reward = 200.00, steps = 200\n",
      "22:57:05 [DEBUG] train episode 2736: reward = 200.00, steps = 200\n",
      "22:57:05 [DEBUG] train episode 2737: reward = 200.00, steps = 200\n",
      "22:57:05 [DEBUG] train episode 2738: reward = 186.00, steps = 186\n",
      "22:57:05 [DEBUG] train episode 2739: reward = 200.00, steps = 200\n",
      "22:57:05 [DEBUG] train episode 2740: reward = 119.00, steps = 119\n",
      "22:57:05 [DEBUG] train episode 2741: reward = 178.00, steps = 178\n",
      "22:57:06 [DEBUG] train episode 2742: reward = 200.00, steps = 200\n",
      "22:57:06 [DEBUG] train episode 2743: reward = 185.00, steps = 185\n",
      "22:57:06 [DEBUG] train episode 2744: reward = 105.00, steps = 105\n",
      "22:57:06 [DEBUG] train episode 2745: reward = 171.00, steps = 171\n",
      "22:57:06 [DEBUG] train episode 2746: reward = 200.00, steps = 200\n",
      "22:57:06 [DEBUG] train episode 2747: reward = 200.00, steps = 200\n",
      "22:57:06 [DEBUG] train episode 2748: reward = 194.00, steps = 194\n",
      "22:57:06 [DEBUG] train episode 2749: reward = 111.00, steps = 111\n",
      "22:57:06 [DEBUG] train episode 2750: reward = 153.00, steps = 153\n",
      "22:57:06 [DEBUG] train episode 2751: reward = 200.00, steps = 200\n",
      "22:57:07 [DEBUG] train episode 2752: reward = 200.00, steps = 200\n",
      "22:57:07 [DEBUG] train episode 2753: reward = 185.00, steps = 185\n",
      "22:57:07 [DEBUG] train episode 2754: reward = 200.00, steps = 200\n",
      "22:57:07 [DEBUG] train episode 2755: reward = 181.00, steps = 181\n",
      "22:57:07 [DEBUG] train episode 2756: reward = 68.00, steps = 68\n",
      "22:57:07 [DEBUG] train episode 2757: reward = 185.00, steps = 185\n",
      "22:57:07 [DEBUG] train episode 2758: reward = 200.00, steps = 200\n",
      "22:57:07 [DEBUG] train episode 2759: reward = 200.00, steps = 200\n",
      "22:57:07 [DEBUG] train episode 2760: reward = 200.00, steps = 200\n",
      "22:57:08 [DEBUG] train episode 2761: reward = 191.00, steps = 191\n",
      "22:57:08 [DEBUG] train episode 2762: reward = 200.00, steps = 200\n",
      "22:57:08 [DEBUG] train episode 2763: reward = 200.00, steps = 200\n",
      "22:57:08 [DEBUG] train episode 2764: reward = 200.00, steps = 200\n",
      "22:57:08 [DEBUG] train episode 2765: reward = 200.00, steps = 200\n",
      "22:57:08 [DEBUG] train episode 2766: reward = 176.00, steps = 176\n",
      "22:57:08 [DEBUG] train episode 2767: reward = 200.00, steps = 200\n",
      "22:57:08 [DEBUG] train episode 2768: reward = 200.00, steps = 200\n",
      "22:57:09 [DEBUG] train episode 2769: reward = 200.00, steps = 200\n",
      "22:57:09 [DEBUG] train episode 2770: reward = 167.00, steps = 167\n",
      "22:57:09 [DEBUG] train episode 2771: reward = 200.00, steps = 200\n",
      "22:57:09 [DEBUG] train episode 2772: reward = 200.00, steps = 200\n",
      "22:57:09 [DEBUG] train episode 2773: reward = 200.00, steps = 200\n",
      "22:57:09 [DEBUG] train episode 2774: reward = 200.00, steps = 200\n",
      "22:57:09 [DEBUG] train episode 2775: reward = 200.00, steps = 200\n",
      "22:57:09 [DEBUG] train episode 2776: reward = 200.00, steps = 200\n",
      "22:57:09 [DEBUG] train episode 2777: reward = 200.00, steps = 200\n",
      "22:57:10 [DEBUG] train episode 2778: reward = 200.00, steps = 200\n",
      "22:57:10 [DEBUG] train episode 2779: reward = 200.00, steps = 200\n",
      "22:57:10 [DEBUG] train episode 2780: reward = 200.00, steps = 200\n",
      "22:57:10 [DEBUG] train episode 2781: reward = 200.00, steps = 200\n",
      "22:57:10 [DEBUG] train episode 2782: reward = 200.00, steps = 200\n",
      "22:57:10 [DEBUG] train episode 2783: reward = 200.00, steps = 200\n",
      "22:57:10 [DEBUG] train episode 2784: reward = 200.00, steps = 200\n",
      "22:57:10 [DEBUG] train episode 2785: reward = 112.00, steps = 112\n",
      "22:57:10 [DEBUG] train episode 2786: reward = 200.00, steps = 200\n",
      "22:57:11 [DEBUG] train episode 2787: reward = 200.00, steps = 200\n",
      "22:57:11 [DEBUG] train episode 2788: reward = 200.00, steps = 200\n",
      "22:57:11 [DEBUG] train episode 2789: reward = 143.00, steps = 143\n",
      "22:57:11 [DEBUG] train episode 2790: reward = 173.00, steps = 173\n",
      "22:57:11 [DEBUG] train episode 2791: reward = 200.00, steps = 200\n",
      "22:57:11 [DEBUG] train episode 2792: reward = 200.00, steps = 200\n",
      "22:57:11 [DEBUG] train episode 2793: reward = 200.00, steps = 200\n",
      "22:57:12 [DEBUG] train episode 2794: reward = 200.00, steps = 200\n",
      "22:57:12 [DEBUG] train episode 2795: reward = 200.00, steps = 200\n",
      "22:57:12 [DEBUG] train episode 2796: reward = 200.00, steps = 200\n",
      "22:57:12 [DEBUG] train episode 2797: reward = 200.00, steps = 200\n",
      "22:57:12 [DEBUG] train episode 2798: reward = 200.00, steps = 200\n",
      "22:57:12 [DEBUG] train episode 2799: reward = 200.00, steps = 200\n",
      "22:57:12 [DEBUG] train episode 2800: reward = 200.00, steps = 200\n",
      "22:57:13 [DEBUG] train episode 2801: reward = 163.00, steps = 163\n",
      "22:57:13 [DEBUG] train episode 2802: reward = 200.00, steps = 200\n",
      "22:57:13 [DEBUG] train episode 2803: reward = 164.00, steps = 164\n",
      "22:57:13 [DEBUG] train episode 2804: reward = 200.00, steps = 200\n",
      "22:57:13 [DEBUG] train episode 2805: reward = 200.00, steps = 200\n",
      "22:57:13 [DEBUG] train episode 2806: reward = 200.00, steps = 200\n",
      "22:57:13 [DEBUG] train episode 2807: reward = 134.00, steps = 134\n",
      "22:57:13 [DEBUG] train episode 2808: reward = 200.00, steps = 200\n",
      "22:57:13 [DEBUG] train episode 2809: reward = 200.00, steps = 200\n",
      "22:57:14 [DEBUG] train episode 2810: reward = 200.00, steps = 200\n",
      "22:57:14 [DEBUG] train episode 2811: reward = 43.00, steps = 43\n",
      "22:57:14 [DEBUG] train episode 2812: reward = 200.00, steps = 200\n",
      "22:57:14 [DEBUG] train episode 2813: reward = 200.00, steps = 200\n",
      "22:57:14 [DEBUG] train episode 2814: reward = 200.00, steps = 200\n",
      "22:57:14 [DEBUG] train episode 2815: reward = 33.00, steps = 33\n",
      "22:57:14 [DEBUG] train episode 2816: reward = 200.00, steps = 200\n",
      "22:57:14 [DEBUG] train episode 2817: reward = 172.00, steps = 172\n",
      "22:57:14 [DEBUG] train episode 2818: reward = 187.00, steps = 187\n",
      "22:57:14 [DEBUG] train episode 2819: reward = 200.00, steps = 200\n",
      "22:57:15 [DEBUG] train episode 2820: reward = 200.00, steps = 200\n",
      "22:57:15 [DEBUG] train episode 2821: reward = 200.00, steps = 200\n",
      "22:57:15 [DEBUG] train episode 2822: reward = 200.00, steps = 200\n",
      "22:57:15 [DEBUG] train episode 2823: reward = 198.00, steps = 198\n",
      "22:57:15 [DEBUG] train episode 2824: reward = 200.00, steps = 200\n",
      "22:57:15 [DEBUG] train episode 2825: reward = 200.00, steps = 200\n",
      "22:57:15 [DEBUG] train episode 2826: reward = 149.00, steps = 149\n",
      "22:57:15 [DEBUG] train episode 2827: reward = 200.00, steps = 200\n",
      "22:57:15 [DEBUG] train episode 2828: reward = 200.00, steps = 200\n",
      "22:57:16 [DEBUG] train episode 2829: reward = 200.00, steps = 200\n",
      "22:57:16 [DEBUG] train episode 2830: reward = 200.00, steps = 200\n",
      "22:57:16 [DEBUG] train episode 2831: reward = 200.00, steps = 200\n",
      "22:57:16 [DEBUG] train episode 2832: reward = 122.00, steps = 122\n",
      "22:57:16 [DEBUG] train episode 2833: reward = 200.00, steps = 200\n",
      "22:57:16 [DEBUG] train episode 2834: reward = 200.00, steps = 200\n",
      "22:57:16 [DEBUG] train episode 2835: reward = 200.00, steps = 200\n",
      "22:57:16 [DEBUG] train episode 2836: reward = 200.00, steps = 200\n",
      "22:57:17 [DEBUG] train episode 2837: reward = 200.00, steps = 200\n",
      "22:57:17 [DEBUG] train episode 2838: reward = 81.00, steps = 81\n",
      "22:57:17 [DEBUG] train episode 2839: reward = 200.00, steps = 200\n",
      "22:57:17 [DEBUG] train episode 2840: reward = 200.00, steps = 200\n",
      "22:57:17 [DEBUG] train episode 2841: reward = 141.00, steps = 141\n",
      "22:57:17 [DEBUG] train episode 2842: reward = 200.00, steps = 200\n",
      "22:57:17 [DEBUG] train episode 2843: reward = 200.00, steps = 200\n",
      "22:57:17 [DEBUG] train episode 2844: reward = 200.00, steps = 200\n",
      "22:57:17 [DEBUG] train episode 2845: reward = 200.00, steps = 200\n",
      "22:57:17 [DEBUG] train episode 2846: reward = 200.00, steps = 200\n",
      "22:57:18 [DEBUG] train episode 2847: reward = 145.00, steps = 145\n",
      "22:57:18 [DEBUG] train episode 2848: reward = 200.00, steps = 200\n",
      "22:57:18 [DEBUG] train episode 2849: reward = 200.00, steps = 200\n",
      "22:57:18 [DEBUG] train episode 2850: reward = 200.00, steps = 200\n",
      "22:57:18 [DEBUG] train episode 2851: reward = 200.00, steps = 200\n",
      "22:57:18 [DEBUG] train episode 2852: reward = 200.00, steps = 200\n",
      "22:57:18 [DEBUG] train episode 2853: reward = 200.00, steps = 200\n",
      "22:57:19 [DEBUG] train episode 2854: reward = 200.00, steps = 200\n",
      "22:57:19 [DEBUG] train episode 2855: reward = 200.00, steps = 200\n",
      "22:57:19 [DEBUG] train episode 2856: reward = 200.00, steps = 200\n",
      "22:57:19 [DEBUG] train episode 2857: reward = 149.00, steps = 149\n",
      "22:57:19 [DEBUG] train episode 2858: reward = 162.00, steps = 162\n",
      "22:57:19 [DEBUG] train episode 2859: reward = 200.00, steps = 200\n",
      "22:57:19 [DEBUG] train episode 2860: reward = 200.00, steps = 200\n",
      "22:57:19 [DEBUG] train episode 2861: reward = 200.00, steps = 200\n",
      "22:57:19 [DEBUG] train episode 2862: reward = 200.00, steps = 200\n",
      "22:57:20 [DEBUG] train episode 2863: reward = 121.00, steps = 121\n",
      "22:57:20 [DEBUG] train episode 2864: reward = 200.00, steps = 200\n",
      "22:57:20 [DEBUG] train episode 2865: reward = 200.00, steps = 200\n",
      "22:57:20 [DEBUG] train episode 2866: reward = 139.00, steps = 139\n",
      "22:57:20 [DEBUG] train episode 2867: reward = 200.00, steps = 200\n",
      "22:57:20 [DEBUG] train episode 2868: reward = 200.00, steps = 200\n",
      "22:57:20 [DEBUG] train episode 2869: reward = 200.00, steps = 200\n",
      "22:57:21 [DEBUG] train episode 2870: reward = 200.00, steps = 200\n",
      "22:57:21 [DEBUG] train episode 2871: reward = 200.00, steps = 200\n",
      "22:57:21 [DEBUG] train episode 2872: reward = 183.00, steps = 183\n",
      "22:57:21 [DEBUG] train episode 2873: reward = 200.00, steps = 200\n",
      "22:57:21 [DEBUG] train episode 2874: reward = 200.00, steps = 200\n",
      "22:57:21 [DEBUG] train episode 2875: reward = 187.00, steps = 187\n",
      "22:57:21 [DEBUG] train episode 2876: reward = 29.00, steps = 29\n",
      "22:57:21 [DEBUG] train episode 2877: reward = 200.00, steps = 200\n",
      "22:57:21 [DEBUG] train episode 2878: reward = 98.00, steps = 98\n",
      "22:57:21 [DEBUG] train episode 2879: reward = 130.00, steps = 130\n",
      "22:57:22 [DEBUG] train episode 2880: reward = 200.00, steps = 200\n",
      "22:57:22 [DEBUG] train episode 2881: reward = 200.00, steps = 200\n",
      "22:57:22 [DEBUG] train episode 2882: reward = 200.00, steps = 200\n",
      "22:57:22 [DEBUG] train episode 2883: reward = 200.00, steps = 200\n",
      "22:57:22 [DEBUG] train episode 2884: reward = 200.00, steps = 200\n",
      "22:57:22 [DEBUG] train episode 2885: reward = 181.00, steps = 181\n",
      "22:57:22 [DEBUG] train episode 2886: reward = 200.00, steps = 200\n",
      "22:57:22 [DEBUG] train episode 2887: reward = 200.00, steps = 200\n",
      "22:57:22 [DEBUG] train episode 2888: reward = 200.00, steps = 200\n",
      "22:57:23 [DEBUG] train episode 2889: reward = 200.00, steps = 200\n",
      "22:57:23 [DEBUG] train episode 2890: reward = 117.00, steps = 117\n",
      "22:57:23 [DEBUG] train episode 2891: reward = 182.00, steps = 182\n",
      "22:57:23 [DEBUG] train episode 2892: reward = 200.00, steps = 200\n",
      "22:57:23 [DEBUG] train episode 2893: reward = 200.00, steps = 200\n",
      "22:57:23 [DEBUG] train episode 2894: reward = 113.00, steps = 113\n",
      "22:57:23 [DEBUG] train episode 2895: reward = 200.00, steps = 200\n",
      "22:57:23 [DEBUG] train episode 2896: reward = 200.00, steps = 200\n",
      "22:57:23 [DEBUG] train episode 2897: reward = 200.00, steps = 200\n",
      "22:57:24 [DEBUG] train episode 2898: reward = 200.00, steps = 200\n",
      "22:57:24 [DEBUG] train episode 2899: reward = 200.00, steps = 200\n",
      "22:57:24 [DEBUG] train episode 2900: reward = 200.00, steps = 200\n",
      "22:57:24 [DEBUG] train episode 2901: reward = 200.00, steps = 200\n",
      "22:57:24 [DEBUG] train episode 2902: reward = 200.00, steps = 200\n",
      "22:57:24 [DEBUG] train episode 2903: reward = 200.00, steps = 200\n",
      "22:57:24 [DEBUG] train episode 2904: reward = 200.00, steps = 200\n",
      "22:57:24 [DEBUG] train episode 2905: reward = 200.00, steps = 200\n",
      "22:57:24 [DEBUG] train episode 2906: reward = 200.00, steps = 200\n",
      "22:57:25 [DEBUG] train episode 2907: reward = 200.00, steps = 200\n",
      "22:57:25 [DEBUG] train episode 2908: reward = 200.00, steps = 200\n",
      "22:57:25 [DEBUG] train episode 2909: reward = 200.00, steps = 200\n",
      "22:57:25 [DEBUG] train episode 2910: reward = 200.00, steps = 200\n",
      "22:57:25 [DEBUG] train episode 2911: reward = 200.00, steps = 200\n",
      "22:57:25 [DEBUG] train episode 2912: reward = 194.00, steps = 194\n",
      "22:57:25 [DEBUG] train episode 2913: reward = 200.00, steps = 200\n",
      "22:57:25 [DEBUG] train episode 2914: reward = 200.00, steps = 200\n",
      "22:57:25 [INFO] ==== test ====\n",
      "22:57:26 [DEBUG] test episode 0: reward = 200.00, steps = 200\n",
      "22:57:26 [DEBUG] test episode 1: reward = 200.00, steps = 200\n",
      "22:57:26 [DEBUG] test episode 2: reward = 163.00, steps = 163\n",
      "22:57:26 [DEBUG] test episode 3: reward = 200.00, steps = 200\n",
      "22:57:26 [DEBUG] test episode 4: reward = 200.00, steps = 200\n",
      "22:57:26 [DEBUG] test episode 5: reward = 200.00, steps = 200\n",
      "22:57:26 [DEBUG] test episode 6: reward = 200.00, steps = 200\n",
      "22:57:26 [DEBUG] test episode 7: reward = 200.00, steps = 200\n",
      "22:57:27 [DEBUG] test episode 8: reward = 197.00, steps = 197\n",
      "22:57:27 [DEBUG] test episode 9: reward = 200.00, steps = 200\n",
      "22:57:27 [DEBUG] test episode 10: reward = 113.00, steps = 113\n",
      "22:57:27 [DEBUG] test episode 11: reward = 29.00, steps = 29\n",
      "22:57:27 [DEBUG] test episode 12: reward = 200.00, steps = 200\n",
      "22:57:27 [DEBUG] test episode 13: reward = 200.00, steps = 200\n",
      "22:57:27 [DEBUG] test episode 14: reward = 200.00, steps = 200\n",
      "22:57:27 [DEBUG] test episode 15: reward = 200.00, steps = 200\n",
      "22:57:27 [DEBUG] test episode 16: reward = 200.00, steps = 200\n",
      "22:57:27 [DEBUG] test episode 17: reward = 200.00, steps = 200\n",
      "22:57:28 [DEBUG] test episode 18: reward = 200.00, steps = 200\n",
      "22:57:28 [DEBUG] test episode 19: reward = 200.00, steps = 200\n",
      "22:57:28 [DEBUG] test episode 20: reward = 159.00, steps = 159\n",
      "22:57:28 [DEBUG] test episode 21: reward = 200.00, steps = 200\n",
      "22:57:28 [DEBUG] test episode 22: reward = 200.00, steps = 200\n",
      "22:57:28 [DEBUG] test episode 23: reward = 54.00, steps = 54\n",
      "22:57:28 [DEBUG] test episode 24: reward = 200.00, steps = 200\n",
      "22:57:28 [DEBUG] test episode 25: reward = 200.00, steps = 200\n",
      "22:57:28 [DEBUG] test episode 26: reward = 132.00, steps = 132\n",
      "22:57:28 [DEBUG] test episode 27: reward = 118.00, steps = 118\n",
      "22:57:28 [DEBUG] test episode 28: reward = 196.00, steps = 196\n",
      "22:57:29 [DEBUG] test episode 29: reward = 199.00, steps = 199\n",
      "22:57:29 [DEBUG] test episode 30: reward = 200.00, steps = 200\n",
      "22:57:29 [DEBUG] test episode 31: reward = 200.00, steps = 200\n",
      "22:57:29 [DEBUG] test episode 32: reward = 200.00, steps = 200\n",
      "22:57:29 [DEBUG] test episode 33: reward = 200.00, steps = 200\n",
      "22:57:29 [DEBUG] test episode 34: reward = 200.00, steps = 200\n",
      "22:57:29 [DEBUG] test episode 35: reward = 200.00, steps = 200\n",
      "22:57:29 [DEBUG] test episode 36: reward = 187.00, steps = 187\n",
      "22:57:29 [DEBUG] test episode 37: reward = 200.00, steps = 200\n",
      "22:57:30 [DEBUG] test episode 38: reward = 200.00, steps = 200\n",
      "22:57:30 [DEBUG] test episode 39: reward = 200.00, steps = 200\n",
      "22:57:30 [DEBUG] test episode 40: reward = 200.00, steps = 200\n",
      "22:57:30 [DEBUG] test episode 41: reward = 200.00, steps = 200\n",
      "22:57:30 [DEBUG] test episode 42: reward = 200.00, steps = 200\n",
      "22:57:30 [DEBUG] test episode 43: reward = 200.00, steps = 200\n",
      "22:57:30 [DEBUG] test episode 44: reward = 200.00, steps = 200\n",
      "22:57:30 [DEBUG] test episode 45: reward = 165.00, steps = 165\n",
      "22:57:30 [DEBUG] test episode 46: reward = 200.00, steps = 200\n",
      "22:57:31 [DEBUG] test episode 47: reward = 200.00, steps = 200\n",
      "22:57:31 [DEBUG] test episode 48: reward = 200.00, steps = 200\n",
      "22:57:31 [DEBUG] test episode 49: reward = 200.00, steps = 200\n",
      "22:57:31 [DEBUG] test episode 50: reward = 200.00, steps = 200\n",
      "22:57:31 [DEBUG] test episode 51: reward = 191.00, steps = 191\n",
      "22:57:31 [DEBUG] test episode 52: reward = 200.00, steps = 200\n",
      "22:57:31 [DEBUG] test episode 53: reward = 200.00, steps = 200\n",
      "22:57:31 [DEBUG] test episode 54: reward = 125.00, steps = 125\n",
      "22:57:31 [DEBUG] test episode 55: reward = 200.00, steps = 200\n",
      "22:57:32 [DEBUG] test episode 56: reward = 200.00, steps = 200\n",
      "22:57:32 [DEBUG] test episode 57: reward = 200.00, steps = 200\n",
      "22:57:32 [DEBUG] test episode 58: reward = 114.00, steps = 114\n",
      "22:57:32 [DEBUG] test episode 59: reward = 120.00, steps = 120\n",
      "22:57:32 [DEBUG] test episode 60: reward = 200.00, steps = 200\n",
      "22:57:32 [DEBUG] test episode 61: reward = 200.00, steps = 200\n",
      "22:57:32 [DEBUG] test episode 62: reward = 200.00, steps = 200\n",
      "22:57:32 [DEBUG] test episode 63: reward = 122.00, steps = 122\n",
      "22:57:32 [DEBUG] test episode 64: reward = 200.00, steps = 200\n",
      "22:57:33 [DEBUG] test episode 65: reward = 137.00, steps = 137\n",
      "22:57:33 [DEBUG] test episode 66: reward = 200.00, steps = 200\n",
      "22:57:33 [DEBUG] test episode 67: reward = 200.00, steps = 200\n",
      "22:57:33 [DEBUG] test episode 68: reward = 200.00, steps = 200\n",
      "22:57:33 [DEBUG] test episode 69: reward = 200.00, steps = 200\n",
      "22:57:33 [DEBUG] test episode 70: reward = 200.00, steps = 200\n",
      "22:57:33 [DEBUG] test episode 71: reward = 200.00, steps = 200\n",
      "22:57:34 [DEBUG] test episode 72: reward = 200.00, steps = 200\n",
      "22:57:34 [DEBUG] test episode 73: reward = 200.00, steps = 200\n",
      "22:57:34 [DEBUG] test episode 74: reward = 200.00, steps = 200\n",
      "22:57:34 [DEBUG] test episode 75: reward = 178.00, steps = 178\n",
      "22:57:34 [DEBUG] test episode 76: reward = 200.00, steps = 200\n",
      "22:57:34 [DEBUG] test episode 77: reward = 200.00, steps = 200\n",
      "22:57:34 [DEBUG] test episode 78: reward = 200.00, steps = 200\n",
      "22:57:34 [DEBUG] test episode 79: reward = 170.00, steps = 170\n",
      "22:57:34 [DEBUG] test episode 80: reward = 200.00, steps = 200\n",
      "22:57:35 [DEBUG] test episode 81: reward = 200.00, steps = 200\n",
      "22:57:35 [DEBUG] test episode 82: reward = 200.00, steps = 200\n",
      "22:57:35 [DEBUG] test episode 83: reward = 200.00, steps = 200\n",
      "22:57:35 [DEBUG] test episode 84: reward = 200.00, steps = 200\n",
      "22:57:35 [DEBUG] test episode 85: reward = 200.00, steps = 200\n",
      "22:57:35 [DEBUG] test episode 86: reward = 167.00, steps = 167\n",
      "22:57:35 [DEBUG] test episode 87: reward = 110.00, steps = 110\n",
      "22:57:35 [DEBUG] test episode 88: reward = 200.00, steps = 200\n",
      "22:57:35 [DEBUG] test episode 89: reward = 200.00, steps = 200\n",
      "22:57:35 [DEBUG] test episode 90: reward = 200.00, steps = 200\n",
      "22:57:36 [DEBUG] test episode 91: reward = 200.00, steps = 200\n",
      "22:57:36 [DEBUG] test episode 92: reward = 200.00, steps = 200\n",
      "22:57:36 [DEBUG] test episode 93: reward = 200.00, steps = 200\n",
      "22:57:36 [DEBUG] test episode 94: reward = 200.00, steps = 200\n",
      "22:57:36 [DEBUG] test episode 95: reward = 200.00, steps = 200\n",
      "22:57:36 [DEBUG] test episode 96: reward = 200.00, steps = 200\n",
      "22:57:36 [DEBUG] test episode 97: reward = 200.00, steps = 200\n",
      "22:57:36 [DEBUG] test episode 98: reward = 200.00, steps = 200\n",
      "22:57:36 [DEBUG] test episode 99: reward = 200.00, steps = 200\n",
      "22:57:36 [INFO] average episode reward = 187.46 ± 31.36\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO2deZgU1bn/v2/39OwbszDAMMMwwww7DDCyKgjIJkZAo6KJmmgkJhoTNQuuMXH5cRM1uYnR/Eg06r0uSVyiNyYqkms0cWURZN9EZBFGdgSBmXnvH1099PRUddfaVdX9fp5nnuk+XafqPbV8z6n3nPMeYmYIgiAI6UHAbQMEQRCE5CGiLwiCkEaI6AuCIKQRIvqCIAhphIi+IAhCGpHhtgEAUFZWxjU1NW6bIQiC4CuWLl36GTOXG8njCdGvqanBkiVL3DZDEATBVxDRx0bziHtHEAQhjRDRFwRBSCNE9AVBENIIEX1BEIQ0QkRfEAQhjUgo+kRURUT/S0RriWg1EX1XSS8hokVEtFH53yUqz01EtImI1hPRNCcLIAiCIOhHT0u/BcCNzNwfwGgA1xDRAADzASxm5noAi5XvUH6bC2AggOkAHiSioBPGC4IgCMZIOE6fmXcB2KV8PkxEawFUApgF4Exls8cAvA7gR0r608x8HMBHRLQJwEgAb9ttvBfZvv8oNu45gol9u6r+/srqTzG8ugvW7DqE2rI8VJXktv/2xoZmPPCPTfhSYw+M6l2ChooCAMCxE63424e7cN7wSgy+41UQAYe/aMHMwd3xy7mNCAUD2PrZ5zjvobcQChJqy/Kx9tNDKMnNxLmNPVCUE8L7W/dh75ET+M6keuw7egIPvb4ZoSBhYt+uWLXjII63tGHHgWMYVl2MV1Z9is9PtOL+C4eipZVBBNz+wmocO9naoSwja0rwwFeG4bwH38LeIydwvKUVbUqk7hG9umDVjoMYW1eKZdsOYMqACjyzdDvmz+iHnQeO4R/r9qCpVxfUlOXhj+9/gkGVRVj28f72Y9w1exAaKgrw07+uwXsf7cOUARXICBCyMgJYtu0AHvrqcDzx7jZ0yQ1h5faDKM3LxKqdh3Dw2EmMri3FzgPHsPTj/QCAgT0KsXrnIUzsW45l2w7g4LGTqtfm9D5lyA4F8NraPagtz8OW5s8BAOUFWWg+fByDK4tw9uDueOnDnVi141CHvNmhAAJEyM3MwGdHjiM/KwOja0vw2to97dsU5YQ6HXt2Yw/85YOdAIDRtSXYsPsI9h89ATMRz7NDAZxWU4KDx05i5faDAID8rAwcOd6CjAChpS3xTgMEJNqspjQXW/ceRXVJLrbtO4p+3Qqw7tPDyAkF0coMZsbJVsbI3iV476N97fl6dsnB9v3HEtpQ3zUfG/ccARE6nYeqkhzcOKUvfvbyOuw8+AXunjMItzy/qt2misIsBIiw+9AXmD2sEu9u2Yfmw8dxorWt0/4ri3Ow48AxZAYDGFNXin9uaG7fZkJDOf65oRkVhVnYfei4qp3BAKFVOVnnDa/E88t34LReJXhv66kyR8owurYE72wJp+eEgjh2shUXNVXhH+v34PtTG3DRadUJz4tdkJF4+kRUA+ANAIMAbGPm4qjf9jNzFyJ6AMA7zPzfSvrDAP7OzM/E7GsegHkAUF1dPeLjjw3PMfAkg378Co4cb8HWBTM7/XbsRCv63/5y+0OSmRHAhrtmtP9eM/+lDttH9nHL8x/iiXe34brJ9fjV4o0dtvnh9L749pl9OuUVBMEfBAOEzfecbSovES1l5iYjeXR35BJRPoBnAXyPmQ/F21QlrVPNwswLmbmJmZvKyw3NIvY0R463aP7WqlSwn+w7CgA40dKmuW00kZbG7oNfdPpt35ETRk0UBMFDBNQU08nj6dmIiEIIC/4TzPyckrybiLorv3cHEHmP3Q6gKip7TwA77TE3veHOdacgCD4nmGTV1zN6hwA8DGAtM98f9dOLAC5XPl8O4IWo9LlElEVEvQHUA3jPPpMFQRBSh4xAckfO6wm4Ng7ApQA+JKIPlLSbASwA8CciuhLANgAXAAAzryaiPwFYg/DIn2uYubXzbgX9aLfwKcmvhoIg2EuyW/p6Ru/8C+p+egCYrJHnbgB3W7BLUEHWsBeE1CMU9Jh7R/AC4ZtCNF8QUo9pA7sl9XieiKefKhw8qj7+22n0jgISBMFbvHvzZJTmZSb1mCL6NvHM0u34/p9XGM53oqUNAQIyguZfumb/5t+m8wqC4B4VhdlJP6a4d2zizY3NCbdRmwjXcOvfMfvBRKId37GzZle8aROCIAinENH3ALFT+gVBEJxCRD+JkMXxlWqjd6zuUxCE9EJE3yacld7I6B0ZvyMIgjVE9H2BiL0gCPYgou8jyOH3CUEQUh8RfUEQhDRCRN9HqPn0F76xxQVLBEHwKyL6NuHEKJqL/v/b2NJ8xPb9CoKQvojoe5h3P9qHe19d77YZgiCkECL6NpGULlYZxCMIgkVE9G1C9FgQBD8goi8IgpBGiOibhJnx+ze3YPeh8GLletw7agHXjPDJ/qOW8guCIOhZI/cRItpDRKui0v5IRB8of1sjyygSUQ0RHYv67bdOGu8mWz77HHe9tBZX//dSx4915HgLAOD9rfsdP5YgCKmNnnj6jwJ4AMDjkQRmvijymYjuA3AwavvNzNxol4FepbUt3Go//EVYkJ3syW2TDgNBEGxCzxq5bxBRjdpvFB6cfiGASfaa5X3MaLwZ7ZbQC4Ig2IlVn/4ZAHYz88aotN5EtJyI/klEZ2hlJKJ5RLSEiJY0NydegMSraPnpF6/drZ3HKWMEwSH6dStw2wTBJqyK/sUAnor6vgtANTMPA3ADgCeJqFAtIzMvZOYmZm4qLy+3aEbySTQB98rHlmDTnsOqv1nszxUEQTCNadEnogwA5wH4YySNmY8z817l81IAmwE0WDXSr3x+vFU13UhcfBmxIwiCnVhp6Z8FYB0zb48kEFE5EQWVz7UA6gGkTESwtjbGyu0HVH/T43uPtPCNtPRXbj+YeCNBEASd6Bmy+RSAtwH0JaLtRHSl8tNcdHTtAMB4ACuJaAWAZwBczcz77DTYTRa+uQXnPvBvvPfRqSIlw1MjXbmCINiFntE7F2ukf00l7VkAz1o3y5us3RVewHzngWMoycs0vR+jFYV0AQiCYBcyI9cAqm4ZI4os6i34FCdChwvuIKJvEkvPgIi/4DNE8lMHEX2bMFIJGBm9A8gDJ7hLeUGW2yYINiKibxFj3h1p4gv+Y9H14902wdN8d3I9ZjX2cNsM3Yjom0RP63vWb/6NX762wfKxpKoQ3KQ41/yghXSguiQXRTkht83QjYi+CaJdOYnCJT/+9sed0iSAmuA3pB83dRDRTxKDfvwKbn9htam88rwJXue6SX3cNkHQiYi+TSQS5iPHW/Diip2m9r1m5yFT+QTBLhK19LsWZifHEI/ip3haIvomsTJuuc3gHXJYWURFENwi0S0r7h//IKJvER9V8IJgGj+1ZJON3yo8EX2b8NuFFwTBHvxWIYroGyDetTVy4f12kwiCEB8/zcER0TdJpGEvAi6kA3Kba+O3t3wRfZPEXmi/XXhBMEKi+SiCfxDRt0ii17qTrW24669rkmSNIAhu4Kc6UUTfJrRWzjr8RQt+/6+PkmyNICQXPSvHCd5ARF8QBEEHdeV5bptgC3qWS3yEiPYQ0aqotDuIaAcRfaD8nR31201EtImI1hPRNKcMd5tIy+aTfcfE3ykIacCr109QTfdbf56elv6jAKarpP+CmRuVv78BABENQHjt3IFKngcjC6WnMm9t3uu2CYLgKNKuAQIEdCvMxq0z+3f6zU+nJ6HoM/MbAPQubj4LwNPMfJyZPwKwCcBIC/Z5iugWfXTtfryl1QVrBCF5+GkculMQEd65eTIuPK3KbVMsYcWnfy0RrVTcP12UtEoAn0Rts11J6wQRzSOiJUS0pLm52YIZ7sPsv1c8QTCCtPRTB7Oi/xCAOgCNAHYBuE9JV5M+1duFmRcycxMzN5WXl5s0QxAEL5BqjZ7skDFp9FOlaEr0mXk3M7cycxuA3+GUC2c7gOh3n54AzMUT9jBWImwKguB91t05AzdOaVD9Lfbp99twVVOiT0Tdo77OARAZ2fMigLlElEVEvQHUA3jPmonpwdETEj5Z8C5+aMj+5pLhtu7PD2U2Q0aiDYjoKQBnAigjou0AfgzgTCJqRPi8bAXwTQBg5tVE9CcAawC0ALiGmVOyl9Puxv7V/73M3h0Kgo34YVhywF8NbtdIKPrMfLFK8sNxtr8bwN1WjPIqWre9HR2578iwT0HwBeruXe9XihFkRq4gCJZJxUa2nS83P5010L6dWURE3wCO3tip+NQIHTh3aA+3TRAMoHdugp63/OwM78xRFdE3iYzgEYySEfTvPeMH50XQZqd+m0ahY4/CfOqt4AaNET9eatSJ6AuCkJC8zITdf66TFbK3NW2m87ogW/08hTxU4YvoGyD+LWDtonrnlhCEzlQUZrltQkLsfoZatZr6sccV944gCOlGtPBVl+TiiW+McsWGPl3zbdtfq0ZL3++eXRF9G/CDv1MQnCRaH4mAcX3KXLHj6XmjkRm0R9Z8MDXBFCL6JlCr6P1e+wuCXbgllgRCWX4WxtSV2rK/Np3uHSBxmb1Uf4joewSpNAQ/46X7tzg3ZMt+NN07Pu+BE9E3id3T0v1+IwmJycrw7+Nm5HZ3qwKIHPfO2YNs2V+2gdFAflpvwL93oRv457oKHmT6oO4YWVPithkpS6SuKcwOobGq2PL+rptUrz3u3iBe6h8Q0bcBPwSjEtwnSIRrJ/Vx2wxTeMl9o4nNNuZkBnHd5PrOh4lznByb5wo4gYi+TfjhmRCEVMYLLlI/VI4i+iaIvbB2hGTww80iWMev74SJXmajBVdu5c54yeef9qL/X+98jGXb9hvOF/0QiHtH0INU7M4SfX6tPJGT+3U1eFzylM8+Ed4PqOEwt/0lvOjX1gUzXbVD9EAQrOEFf7oXXEyJ8H1Lf+nH+/CPdbvdNkMQdOF9SfAv/boXuG2CJl56E0go+kT0CBHtIaJVUWk/J6J1RLSSiJ4nomIlvYaIjhHRB8rfb500HgDOf+htXPHoEqcPkxB5dRcSIbeIs2TZFNQs1Z9lPS39RwFMj0lbBGAQMw8BsAHATVG/bWbmRuXvanvMTB5tbYyn39uGEy1tuvPYUYlLfP40IJUvcVTZvHAvXza6l+5tM4MBdC/Kbv+esNNapXgeasgnJKHoM/MbAPbFpL3KzC3K13cA9HTANld4YcUOzH/uQzz4+iZD+az68lra9Fcygn/xkzhY5ftTG5CX6Y6ffXDPIt3bbrh7BvKzzHdvEqIqCg0Z8NJ1t8OnfwWAv0d9701Ey4non0R0hlYmIppHREuIaElzc7MNZtjDwaMnAQD7Pz9hKJ/VIVlfnBTRF1KLayfVY/VPY50E3oRI/bMevCToerAk+kR0C4AWAE8oSbsAVDPzMAA3AHiSiArV8jLzQmZuYuam8vJyK2bYwrETrQm38dJYW8F/+GFkhxH6dTPfcerlOEQJI2b6XAZMn3kiuhzAOQC+wspAdWY+zsx7lc9LAWwGYE/wCof5wTMrNH9bvm0/Wlrjt8RT7YEWBCcZ0KPQ0WHSyRRm0vgcjZfm8pgSfSKaDuBHAM5l5qNR6eVEFFQ+1wKoB7DFDkOdZu2uQ6rpq3cexJwH38LPX12vmddD11NIAsOqzQfzStWmgR7hi8Zrz0y8Rtuo3qkVJC9h7wURPQXgTABlRLQdwI8RHq2TBWCR0lP/jjJSZzyAnxJRC4BWAFcz8z7VHXsMrREHnx0J+/bX7DzUvugxwV8z8ARv4IFBLaZJdLv7/XGId22evGp03PVytfKeN6wSzy3fAQDoa8EVZjd6Ru9czMzdmTnEzD2Z+WFm7sPMVbFDM5n5WWYeyMxDmXk4M/+P80VwlqByRdviqDyRvx9oITn0KMrxvTj+59xGW/bjp+clGCBkxumDYNbo74sqY79uhVhx+1QHrDOOd3tTXCL20gWUCxdvRKW0+oVEvHvzZFSX5rpqQ79uBZjQYG3QRF6munPAioZHj5GPZsmtZ1nYa3ysDNEMBrQH6sebo1Bk04peVklr0f9kX3t3hOZNG7mI0UunySgewSgVherClkxG15aajk/jVMP8/VvOwqIbJqj+VpafZXq/8Z7RJbeehbdummR636GYhdf99NYCpLno7zhwrFNa7PWL1OrM3N7aD/jtKgsCgItHVpvOm6iZY3QWbqQNVV6QZanVrZfospflZ6Ew2xutbjdIa9GPFm+te7bdvcOn/PoBImntC77DjraK1j68NCQR6DwaJzuUHKnzQ3MwrUVfTwwNiurIbWv326nsy17ThBTFzfuEkCTXpI5COlkBAZ3LmWgejRfiBSWLtBZ9PUS39CNVQmf3jrdaOYLgFNEN+mifu99Fs8blTvZkktai33FCCXVKA04JfFvbqZa+Wud9OvCDaX3dNsH3uNk8YNgz0uzxK0bi9nMG4Btn9Fb9PVmTs+Idx+j+f37BUAt2kKnrGgq6IyRpvXKWnsZJpCM37N4JX9pvP7EMfbrmO2maJ5nQUI6fv6I9Mzkd6JUiLcL5M/phSGURLvn9u4byEQHjG8oxvqFc04/vxffeRG4tq53JXXIzAQB5WfpGRz12xUjUluVZOqZZ0lr01doKnX364f+rd54K03C8pa3DdyF9aKgowGdHjrtthmVqSnMxtk+ZpX1YcenY49On9ib9M1ePsX3/Rvjh9L7oXZaLaQO76dre6nwJK6S3e0clnOpP/mdNh230DM9k9r9PU9DHr+YOc9sEz5OsJyH6OE01HePjxL6EOB0QMTsUxKVjanyhA2kh+oe+OIk5D/4bH332eYd0PZfHB9dQSCI5FhcF8c7tZHRcfXz3iHfKJSQiLUT/tTW7sXzbAfxq8cYO6XbVys8u22HLfoTU4+/fPQO3nzPAbTMARIZsegO9Ha1nD9Z2l2QY6Ah1cqiq3xqGaSH6Wtg1yuC1tbst2+IH/HZze4H+3QtxxemnRrm4PXongtFr6Zbb4qtx1rr97VdHANCO3SOok9YduYnu49+/uaVTnA1BEOKjp4Kwow6pLgmPpNLlphUHVDspLfrLt+3H6p2HkBvjh2Vm3L9oA4b0jL8Yxl0vrXXSPN8hD0764laYhXj3XLzKxWNRITxFSjdj5zz4Fm79y6pON8CHOw7i1//YhO89vbw9zQ+97oJgFbNiGMmm9ZgYXkzcRlF249m9/0Lzk7ncJqVFX4vIKjjHW7SD5Bu9kaTOEPyE3berGy1rI2WwuyP3vOE9MV0Zk++3Rz+h6BPRI0S0h4hWRaWVENEiItqo/O8S9dtNRLSJiNYT0TSnDLdCRNDjBmwyeBfL66R/uW5yfdKO5b5AeONGdTrgmqCNnpb+owCmx6TNB7CYmesBLFa+g4gGAJgLYKCS58HIQulegGL+Rwu13D/pyw1TGkzle/ZbYzG2rtRma7yNlo+9w0RHO48XZ2dG+pikP+oUetbIfQNA7OLmswA8pnx+DMDsqPSnmfk4M38EYBOAkTbZ6ijSakiMnCPreKOdbRwvv8mqh0j3sMEuY9anX8HMuwBA+d9VSa8E8EnUdtuVtE4Q0TwiWkJES5qbm02a4RxGffrpMFZfRD91MLzSVXtGrf1ZMscSbh3brxWL3R25aqdf9cww80JmbmLmpvJyZ4MPaQVRi5vHYNPmwNGThrYXhFTCy28CgH8F2gnMiv5uIuoOAMr/PUr6dgBVUdv1BLDTvHnOEm+G4ppdxqJoSiNY8ANeF+dYtJ6rrQtmui7kfu0nMCv6LwK4XPl8OYAXotLnElEWEfUGUA/gPWsm2k/kYmXEWQ3l/a37k2WO4Ft8pqBRGJWryJuvVj61iLV2oGcClpr4JiPKptuVjln0DNl8CsDbAPoS0XYiuhLAAgBTiGgjgCnKdzDzagB/ArAGwMsArmHmVqeMN0zMdQ/auATW4eMttu1L8A+GY9g4Y0bSsGsilNYbx5QBFYb35XZfk9vHN0rCMAzMfLHGT5M1tr8bwN1WjHKMmIXNOw7Z9NmVcwE5R7GQYXdJZPPasjxsiQn1LRjD7Xa231xlEdJyRm4Ev76euYXfWjReJivkzvQVq3e8XbeArS4g+3aVFqSX6Me5O1rapAIQjGNWvNwWKicrcDvfCO2cNW83fm0EpZfoxxB9z6w1OFpHEPyIWaFMlC1a6JP9Bu1WsERx7/iISx9+F4D7PkEhfXHj3rNDpJKtr/EO553n119N/rQQ/e//eUWH7/sjE6m8c9cIaYK/5OEURlrvye7w17UCnjzs7aSF6Av24FfBsotbZ/Z32wRL2DGW3ksjuPzqXnGbtBb9E63a8fSFzvi148ouqpTl+boVhtdkzTExAsdtnXL7+I6g4770UmXlNim9XGIscuEFO/iPLw/B1IEVGNCj0PQ+3L4TjT4LCTtyDRaoNC9T13bx96ttVDLeAvxagaZsS//2F1appk//5RtJtkRIFSL6U5gdwpxhPa3tywXVZ7YuhnbZfe8F1pcbPBWGwV389gacsqL/+Nsfq6av+/Rwki0RUgW/tuzswEhloUcEi3P1tfT1Ha/zAf0mxMkkpUT/jhdX40/vf5J4Q8FX9CrNddsE23GrE9KqGPpFS6WTV5uU8uk/+tZWAMCFp1XF31DwDXOGVWLFJwfcNgOAuuAZ9Y27LZpeF0OVNrvmtlaL8uRVo7B9/zGLe/EfKSX6Vjn0hSyEEh+3Jct7GB3/7abmdmjlG7yUdoxzz8/KwJEE0WiNHCWeT1+PvWPrygwcTfv4fiOl3DuJSHQjLPznliRZ4k/c8JMSgME9i5J/YIdxb4k/i1iwOzfT+BBXP/jmfWBiB9KqpZ+oZj520juh/wUFAoIeeaxUOwxN2ubW6B038rqBV4Zn/2h6P+RluRNRVYu0auk/v3xH3N8f/tdHSbJE8CPJiuo4sa+za0ZbIZ6YzhjUDQBwz3mDk2JL5M1drQL1StiFb51Zh8vG1LhtRgdSsqX/r42fuW2CIJjmP84fgpH3LHb0GE60gx/66oi4v+uRYSN2xVsuUdDGtOgTUV8Af4xKqgVwO4BiAFcBaFbSb2bmv5m20ARfVaJoCoKd2BnC1y13ienQyjbboRc9Z9w9v7833iaMYlr0mXk9gEYAIKIggB0AngfwdQC/YOZ7bbFQvz3JPJyQJAjkmVd1v9Mx4Fry+yKkPe4N7PLpTwawmZnVp8EmgZdXferWodMGtx7aVJT8eOJZlBtKniF6cW0yWZxx+nFsivyWp4wYOrexh51m+Rq7RH8ugKeivl9LRCuJ6BEi6qKWgYjmEdESIlrS3Nystokhbv2LeqwdwT7cWqHIK6hOznLglGRlBLF1wUzb92vLIipa6TpOhJ7DX9Ckf2KlnjfAuq752LpgJhqrinXv1yh+ey4siz4RZQI4F8CflaSHANQh7PrZBeA+tXzMvJCZm5i5qbzc+miFvZ+fsLwPwZt45ZFSkxizQuq3zkctgT1nSHdbjzNlQIXhPH4TXbexo6U/A8AyZt4NAMy8m5lbmbkNwO8AjLThGKrsF6FPec6otzZrMt1p6tUFDRX5ndLNyqSXBDZehVvZJQcAMLux0pXjexk7hmxejCjXDhF1Z+Zdytc5ABzzu2zbd9SpXQseYUxdKd7YYN39Zwd2uneS1TntIY02JZK6Ru+opJXlZ2Hj3TOQEfDQCfAIllr6RJQLYAqA56KSf0ZEHxLRSgATAVxv5Rjxj+/UngUv4XaDqqmXareUKZJ9y8bG0DfbOtXK5/a1iUcoGHD0zcSv+mNJ9Jn5KDOXMvPBqLRLmXkwMw9h5nOjWv22E/DrWfcpbpxtM/Fa7CYrZN/EdSMiufon02w7rl1oPXJujqd3LY6Rl2u8OKRVGAbBfxRke2f4op3ioqcjNy/LuveVyB67vahvXhFdvzU9fS360tJPLm6dbrsOq9ahqYfIfWanyLh965o9fmy2gco6wT2KcxLmNeXTj2Nnl7xwg2B4tX3uNyNEor92K8p25fhm8XXsnYCvqywh2TzzrbHYd+QEzrz3dbdN6cTZg7vhbx86P8HQaAdyVUl8Mb96fB3G15djUGXyw1/37JKLl793BmrLzFXmVvnOpHpMGVCBgT38Ffrb17Lpt7HOgjnsamAXZodQU5ZnOr+TrfPqEvN2xUPrGdH77PzPtacD0A5zEgiQo4KfyM5+3QqRmWGPjP34SwMMbR8MkO8EH/C56MtoLCEZODECJFZDnapQrAwN7V2W12kB82S7pdRi0VfqcCWZoapL6q3FrIavRd9LE0XSgXR/s3LydvP6mbXnbcv4XmrL8/GHr5/W/v1P3xyDv1wzzhZr0hVf+/RF871JXmYQn59InVXInLjNYu9dx4YzxlhvtjO6oiDcWZkdSv4Q2ol9u7Z/Htm7JOnHTzV8LfoyesebDOlZjLe37LVtf25f5WTcZsl+izJapp9dMMSGTku3r6QA+N2947YBaYbd4tdd51A3t4djO9LSj/1u8CCja821ePW09O+9YGintMLsEM4f0dPUMaOObjG/YAfS0hdsJ3rtUq9MoPEDj18x0hOxYoZVh8MQu2+J4AT+bunLXelpfnlRY9zf/VIhJMNMAjC+oRxj+zgXVZTB8szEIahUuBnB1D5Jvm7pyw3sTSJvYOUFWehRlI2dB79w2SJ7sNPv3qkicTIwWJTdfqlo3WB8Qznmja/FvPG1bpviKL5u6Yt7x5vce8FQXDamF0b1Lo27nVy+Uxg5FffMGWxIvNXG6keOpxbQzqmKwesVTjBAuPns/ijLz3LbFEcR0Rdsp0dxDn46a1D767LQGStn5pJR1aYOpLbIvN0rXwnex9eiL5rvb7ze8ksmSb+XleOpXYNUC4EsdEREXxDcwMM3b7q6d9IFf4u+DCoTkkA8sbIrFIhT93L0XlV9+/EOK49XSmJ1ucStytKIHxDREiWthIgWEdFG5b9jwa7FZZxczOhbSjXuVMqvFn2yVk8kz5h8zgVciz96R1rf6YcdLf2JzNzIzE3K9/kAFjNzPYDFyndHkI7c9OArRtO3kawAABK7SURBVDouPUBpfmbijWJIdCdPaCjv8N2IVquP3pFnJ11xwr0zC8BjyufHAMx24BgAPO0WFWykqaYEWxfMdO34l43pBeDUKlG2EHPzJrqXv2w5BEJk9I4enGn+y0uFN7Aq+gzgVSJaSkTzlLSKyGLoyv+uahmJaB4RLSGiJc3NzaYOLqGVnSMZp9ZKrPdkMrl/BbYumImuBZ1jBdnm00+wH7uWNxQEq6I/jpmHA5gB4BoiGq83IzMvZOYmZm4qLy9PnEEF0XznuHik/S6VG6Y02L5Pp/nz1WPi/q61olQiLN+6jtaXzncqC+5hSfSZeafyfw+A5wGMBLCbiLoDgPJ/j1UjtRCffkfuMLjcW7JJ9U7DaQMrHNu3Hecu+s1KHp30xbToE1EeERVEPgOYCmAVgBcBXK5sdjmAF6waqYWM3nGGHjpDHhslVmhmN1YCAEIaAa6+NLSHavr6u6bbapcVIm6Z313WhCvG9bawH6MZjGwatbGuykN8+qmMlZZ+BYB/EdEKAO8BeImZXwawAMAUItoIYIry3RFkBEJH7PIvv/6Diba1yqP3E2vdj6b3w6qfTENupnrcv19c2DmuOwBkZSR/9aZEhIJk6PzHnt6kL6Ji0zaC/zAdZZOZtwDo9FQy814Ak60YpRd5RXWGzAz1toDViiD2egUChPws9VswMxhARtBYm2RsXSne2mxsxS673hat1pGJ7uVOv1sMuCakL74OrSw+feew69RG70erJayWbFSo3rt5MgpzQuh328sYXVuCd7bsS5hn+W1TEHQpdnrsUeu75jtznJiAa3pwLgyDVD5ewNeiL5rvHE64d5yka2G4H+LNH05EWX4W+t/+sua2799yFoIBQpc845OotLByK756/Xg0VBTYZosW0uIXAJ/H3pGWvr/QulyNVcWdtzUpo1UluchRiREfTXlBFkpsFHwg7G0x0pKNPhdOC36HMAw6hN+5KJvxd5wT8l5fjRlundnfbRPi4mvRF8nviNN1YCIxTYSWkP/mkuE4b3ilpX27hdlT7ranw2sTGxddPx5v/HCi22bYQnGuvQ0Ku/G36Hvrvk15tDpd46FnbHheVgaGVBYZ2u+L144zbIte3po/CXk6K7ho7bYyeidZ6Kls3PDp11cUoLwgtVes8go+9+mL6jtH5wc02+Lrt51Xa0jPzi4hu+hRnIN+3Qux9OP9zr1NWlRWu/zz8faSas/XazeMx4kW6dfwtegL3ifapeNHDUkkEabdOybzmTuWv4TutRsmYP/RE7bvt09X5zvL/YCIvuAofhOcVEOrH8XLFXAfh4avCmFE9FMIDz/HAKzPOv3rd07H7kNf2GSNPhJZHOnnCAUCyFDG/OdlZeCZq8fgeEubZj693p1+3Qqw7tPDnc6dkf6VjqN3hHRHRF9IGtGty8evGGk4/6DKIgwy2OGrlwcuGYZrn1xuON/dcwZhQI9CjOtTCgCYP6MfLhjRE6X58Tsl23Sqfm15HtZ9erjTG9O9FwzFn5dux4K/rzNsM+D9BoLgHL4eveN1nrpqdHIPmOCd/dGvn5YkQ04R0gilML5BPZx2+1KDBlRp4aUjjJrViXOGdAzuFgkCl8gNUpybiWsm9gFROPbO1RPqEgo+oL+lr/V2VJqfhasn1Gnm0xorrrdcQuoiou8gY+pKMbauNHkHjKMkt50zALVlyfeVRrfo9YwGOa2mJPxBhyj+59xGLLx0BKYO7GbWPE1+cVEj5o2vxbAqx5Z41oXZPpEB3dVX+dJTLqdcQOniWvJ6fSqinyZcebqxsL92jdWuLc/H18bWJNzOzPDAWY2Vjgg+AHQvysHNZ/dHwOb43ePqSvGVUdVYcP5gQ/mM9odoXT4j5fK6eHmVmUO6Y+5pVbjlbG/OzBWffirh8Xd2Q/rp7aKYJiMYwN1zBmNPkjqkiYBfXzIMv319M+o9MmTxJ+cONJznB9P6orok1wFr7Cc7FMSC84e4bYYm0tIXTHPOkO66tot0WsbTcaciMJ5RX+bIfq2ix+9vFw0VBbj/okYEPbLq0Lkai+PE45qJfTQX1RGMIaKfQqg90mf1r8DPv+xMq0MtUJoaET13Y4bnf105KunH1INXBFhIP0T0DWBGs26c6u5i4AsvHYELmqps2993J9ebzhvv/Hlhyv/XxtbgIhvPlR18+8w+KMvPNDwgYEjPIlQUZiVcjP4bZ/RGqc0RRwVtzh3aI+6oq2Rg2qdPRFUAHgfQDUAbgIXM/J9EdAeAqwA0K5vezMx/s2qoFzDjgRjRq8R+Q3yGkREods3gnTqgAkU5IUN57jDha3aaQZVFWHLrFMP5CrJDePfmsxJu169bIZbeNgU1818yY55gkF9dPMxtEyx15LYAuJGZlykLpC8lokXKb79g5nutm+csjVXF+OCTA7q3b6jIx4bdR0wfLysjEHeWph8w0yBvd+/Ya0pcFl7W1P55QkO5bcsipgN2d6/UludhXF0ZXlyx094dC6Yw7d5h5l3MvEz5fBjAWgCeC4peWZyj+dvI3tqt8GHVxdi6YCaGRvmttWaDjqnV9+r9/84bjK0LZura1m30PPgTNCZYddpX5IOOGsOJBcIfu2Ik/vB14zOA/Ybe66FG96LsTml2edz+ceOZuHP2INv3K5jDFp8+EdUAGAbgXSXpWiJaSUSPEJHqLBAimkdES4hoSXNzs9omlnnthglYfOMEzd/jjRi5ZGQ1AOCFa8bhji8NAKAe72TLPWfjqXmdZ96OUqlQ9IwpH9enFHXleQm3UyPZD1O9yopPquvdGmjpS4A2d3j7pslumyAkCcuiT0T5AJ4F8D1mPgTgIQB1ABoB7AJwn1o+Zl7IzE3M3FRebr6FEi/wFFH8FmtbnN+iOz8jE1nU4qVoCe0fvzmmU5reIFle6NRMxHUaHboNKmPBpw6sAACM6OXu7FZBECyKPhGFEBb8J5j5OQBg5t3M3MrMbQB+B8DR9+pVP5mmbV+CvFoVQuzY7ogIM6OTe8aMQGsJphF6dtF2W0UTbV6sqZkacXHsZmLfrti6YCb6a4QGEOxh+iBnZicLqYXpp57CavcwgLXMfH9UevSMnTkAVpk3zxrhIFjav2u5En46a1CH74MVX/6YmGFz6++arsuOzGDAE37M2EruyatGYcNdM3TlzcqIf6tElhc0KzyR8+NGp28qsO7O6Zh7mreGmwrexMronXEALgXwIRF9oKTdDOBiImpEuP9uK4BvWrLQAgGKPwlGq6VfFdOKbqwqxorbp6Iot+MQwKwMfcsHrrxjqq7tkk0gQMhUxHxsXSne2rxXc9t75sSPFdMlLxNv3XQGCkysowu4v1C4XxhZoz74wOpSlkL6YFr0mflfUG+QeWZMfmZGAKFgAEtvPQsj7npNd76ASrM8VvCNEPtADuzhjJsj0cgXrbeN5bdNQW5WEH1vfbnTb/27F2LtrkOo01jNKLLIR4DI8Lh4NTIUl9NZ/Sss7yvVWH7bFOToXLDdLA0VsmpVqpPSAdciPuvoOCeF2Rk49EULgLBgqZHIFTO0ZxFWbD9o2q5pA7shNzOIoyda0bssDx999rnpfVkhUswuKjMyI66vy8f0wpi6UvQqVR9R9Mu5jZj+yzdtCyuQESC8NX8SSvNllmgsatdJjcEmF5p575bJyMvMwCf7j5rKnwin4isJxkht0VfxQ4/sXYLX1u7Bc98ei2FVxbj31Q347MjxDtsk6px98qrR2HvE2sLN1SW5WPfpYQyqLHJN9GMfwfdvUZ/BGS34vZVFTiKLndjVGRx9ynvEmVshdKQ0LxN7Pz91L77xg4koMVlhdi3oOFbfiTkTTu5X0Efaif6vLx6OrXs/bx9Jsuj68Rh256JO28UjLysDeSZ917FcPaEW2/cfxfJt4ZnBpPORGFxZhO37j3VIixbO4dXFWLZN/2xjACgvSBz58dyhPVBdkqs72JqgzTs3Tda9bKIWr90wAQeOnWz/Xl3qj/DDgnukdMA1tVZoTmaww9BBva/MThEgwtCeHQW0QZn09MeoSV+Lb5yAt+ZPav9+34VD21/juypiXRblxnr0ipF48dpxlsf8x2YnIgyr7tJpv1Zf3SNlqNQ5FDUV6FaUbfmtpkteZvvbl13kZYYbNH00+nHMEnnugkFp6btJSrf0vTzJKZ5G/uzLQ3DJqGqMigrvUFfe8QHMzczA8OpifLjjIL45oQ69y3IxsW9XvHr9eGQGAyjMDmFITGUSO+wy3tnpVhQWo+Lc+JWiXed42sBuePjyJpzZt6st+xPMU1WSiye+Mcr2t7mFlzVh7a5DuicpCs6Qcme/uiQX2/Y50xFlJ5GO0ljN7FqYhbysDIzro3/xjyABk/qFR7s0qIRGOLXvbPzha6fhzr+uwZYE/QjfmdQHfSsKMHWAvaNo/vqd05Ed6vwGRkSYLCN2PIOR+08vRTkhjNYZp0pwjpQT/QABb/5wYtI7R//6ndPR2saY9Zt/69r+1CQkQrEyHHRsXSnujJkYFsviGydgz6Fwx3OR0go30r8wsV9XPPHuNmz57PO48wxCwQBm6lgZK6S8qsd2AmqhFbROEITkkHKiX5KXiaqSXFQZWE/zpetOx+vrm/HzV9abPq5RMYv0JYSChG+f2QcleZn4yqheCYc+1pXnt7t6rplYh7L8TJw/vKehY993wVC8svpTDLBhvkDPLrm474KhOLOv+fhJgiAkj5QS/TtnD8K0gcZdBAN7FGFgjyJLoh/hxWvHtbfE4/GbS4bj5VW7UKsI+GVjagwfKysjaCpfUW4IF9o4Zf/8EcYqHUEQ3CMlRP/Zb43FroPHcM4Q7YWTf3XxsIRByv523RlYs+uQJVtiO0+1KC/IwqUmBFsQBMEKKSH64ZC98cP2njtUu0KIMKBHoS0uDzu5Z85g9O+u3TkrCIJghJQQ/VTmklHVbpsgCEIKkdKTswRBEISOiOgLgiCkESL6giAIaYSIviAIQhohoi8IgpBGiOgLgiCkESL6giAIaYSIviAIQhpBXli3koiaAXxsYRdlAD6zyRwvIOXxNlIe75NqZdIqTy9mNhTt0BOibxUiWsLMTW7bYRdSHm8j5fE+qVYmO8sj7h1BEIQ0QkRfEAQhjUgV0V/otgE2I+XxNlIe75NqZbKtPCnh0xcEQRD0kSotfUEQBEEHIvqCIAhphK9Fn4imE9F6ItpERPPdtkcvRLSViD4kog+IaImSVkJEi4hoo/K/S9T2NyllXE9E09yzvN2eR4hoDxGtikozbD8RjVDOwyYi+hURxV8V3kE0ynQHEe1QrtMHRHR21G+eLRMRVRHR/xLRWiJaTUTfVdJ9e43ilMmv1yibiN4johVKeX6ipDt/jZjZl38AggA2A6gFkAlgBYABbtul0/atAMpi0n4GYL7yeT6A/1A+D1DKlgWgt1LmoMv2jwcwHMAqK/YDeA/AGAAE4O8AZnisTHcA+L7Ktp4uE4DuAIYrnwsAbFBs9u01ilMmv14jApCvfA4BeBfA6GRcIz+39EcC2MTMW5j5BICnAcxy2SYrzALwmPL5MQCzo9KfZubjzPwRgE0Il901mPkNAPtikg3ZT0TdARQy89scvnMfj8qTdDTKpIWny8TMu5h5mfL5MIC1ACrh42sUp0xaeLpMHOaI8jWk/DGScI38LPqVAD6J+r4d8W8CL8EAXiWipUQ0T0mrYOZdQPgGB9BVSfdLOY3aX6l8jk33GtcS0UrF/RN51fZNmYioBsAwhFuSKXGNYsoE+PQaEVGQiD4AsAfAImZOyjXys+ir+a38Mv50HDMPBzADwDVEND7Otn4uJ6Btvx/K9RCAOgCNAHYBuE9J90WZiCgfwLMAvsfMh+JtqpLmufIAqmXy7TVi5lZmbgTQE+FW+6A4m9tWHj+L/nYAVVHfewLY6ZIthmDmncr/PQCeR9hds1t5VYPyf4+yuV/KadT+7crn2HTPwMy7lQezDcDvcMqt5vkyEVEIYXF8gpmfU5J9fY3UyuTnaxSBmQ8AeB3AdCThGvlZ9N8HUE9EvYkoE8BcAC+6bFNCiCiPiAoinwFMBbAKYdsvVza7HMALyucXAcwloiwi6g2gHuGOG69hyH7l1fUwEY1WRhtcFpXHE0QePoU5CF8nwONlUo79MIC1zHx/1E++vUZaZfLxNSonomLlcw6AswCsQzKuUbJ7re38A3A2wr34mwHc4rY9Om2uRbgXfgWA1RG7AZQCWAxgo/K/JCrPLUoZ18PFES5R9jyF8Kv0SYRbGleasR9AE8IP6WYAD0CZIe6hMv0XgA8BrFQeuu5+KBOA0xF+xV8J4APl72w/X6M4ZfLrNRoCYLli9yoAtyvpjl8jCcMgCIKQRvjZvSMIgiAYRERfEAQhjRDRFwRBSCNE9AVBENIIEX1BEIQ0QkRfEAQhjRDRFwRBSCP+D7vyQL47HopGAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "def play_episode(env, agent, max_episode_steps=None, mode=None, render=False):\n",
    "    observation, reward, done = env.reset(), 0., False\n",
    "    agent.reset(mode=mode)\n",
    "    episode_reward, elapsed_steps = 0., 0\n",
    "    while True:\n",
    "        action = agent.step(observation, reward, done)\n",
    "        if render:\n",
    "            env.render()\n",
    "        if done:\n",
    "            break\n",
    "        observation, reward, done, _ = env.step(action)\n",
    "        episode_reward += reward\n",
    "        elapsed_steps += 1\n",
    "        if max_episode_steps and elapsed_steps >= max_episode_steps:\n",
    "            break\n",
    "    agent.close()\n",
    "    return episode_reward, elapsed_steps\n",
    "\n",
    "\n",
    "logging.info('==== train ====')\n",
    "episode_rewards = []\n",
    "for episode in itertools.count():\n",
    "    episode_reward, elapsed_steps = play_episode(env.unwrapped, agent,\n",
    "            max_episode_steps=env._max_episode_steps, mode='train')\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('train episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "    if np.mean(episode_rewards[-20:]) > 199:\n",
    "        break\n",
    "plt.plot(episode_rewards)\n",
    "\n",
    "\n",
    "logging.info('==== test ====')\n",
    "episode_rewards = []\n",
    "for episode in range(100):\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('test episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "logging.info('average episode reward = %.2f ± %.2f',\n",
    "        np.mean(episode_rewards), np.std(episode_rewards))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "env.close()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
